From f7f068cf59a928471330ae5f37f8bb4401872cb3 Mon Sep 17 00:00:00 2001 From: Jeff Gentry Date: Fri, 2 Dec 2016 20:02:46 -0500 Subject: [PATCH] Release 23 (#1735) --- .travis.yml | 14 +- CHANGELOG.md | 119 +++ MakingABackend.MD | 197 +++++ README.md | 621 ++++++++++++++- SecurityRecommendations.md | 51 ++ .../backend/BackendCacheHitCopyingActor.scala | 4 +- .../cromwell/backend/BackendJobBreadCrumb.scala | 14 + .../backend/BackendJobExecutionActor.scala | 15 +- .../cromwell/backend/BackendLifecycleActor.scala | 6 +- .../backend/BackendLifecycleActorFactory.scala | 26 +- .../BackendWorkflowInitializationActor.scala | 13 +- .../scala/cromwell/backend/OutputEvaluator.scala | 31 - .../backend/RuntimeAttributeDefinition.scala | 4 +- .../async/AsyncBackendJobExecutionActor.scala | 10 +- .../cromwell/backend/async/ExecutionHandle.scala | 6 +- .../cromwell/backend/async/ExecutionResult.scala | 8 +- .../src/main/scala/cromwell/backend/backend.scala | 30 +- .../backend/callcaching/CacheHitDuplicating.scala | 30 +- .../main/scala/cromwell/backend/io/JobPaths.scala | 70 +- .../cromwell/backend/io/JobPathsWithDocker.scala | 39 + .../scala/cromwell/backend/io/WorkflowPaths.scala | 35 +- .../WorkflowPathsBackendInitializationData.scala | 7 +- .../backend/io/WorkflowPathsWithDocker.scala | 16 + .../src/main/scala/cromwell/backend/package.scala | 12 +- .../validation/RuntimeAttributesValidation.scala | 5 +- .../main/scala/cromwell/backend/wdl/Command.scala | 31 + .../scala/cromwell/backend/wdl/FileSystems.scala | 30 - .../cromwell/backend/wdl/OutputEvaluator.scala | 19 + .../scala/cromwell/backend/wdl/PureFunctions.scala | 60 -- .../cromwell/backend/wdl/ReadLikeFunctions.scala | 7 +- .../cromwell/backend/wdl/WriteFunctions.scala | 12 +- .../wfs/DefaultWorkflowFileSystemProvider.scala | 9 - .../backend/wfs/DefaultWorkflowPathBuilder.scala | 8 + .../backend/wfs/WorkflowFileSystemProvider.scala | 34 - .../cromwell/backend/wfs/WorkflowPathBuilder.scala | 25 + .../test/scala/cromwell/backend/BackendSpec.scala | 58 +- .../scala/cromwell/backend/io/JobPathsSpec.scala | 40 +- .../scala/cromwell/backend/io/TestWorkflows.scala | 14 +- .../cromwell/backend/io/WorkflowPathsSpec.scala | 52 +- .../validation/RuntimeAttributesDefaultSpec.scala | 2 +- .../wdl/PureStandardLibraryFunctionsSpec.scala | 30 + build.sbt | 1 + core/src/main/resources/logback.xml | 88 +++ core/src/main/resources/reference.conf | 38 +- core/src/main/scala/cromwell/core/CallKey.scala | 7 + .../main/scala/cromwell/core/ExecutionStatus.scala | 2 +- .../main/scala/cromwell/core/ExecutionStore.scala | 13 - core/src/main/scala/cromwell/core/JobKey.scala | 6 +- .../src/main/scala/cromwell/core/OutputStore.scala | 40 - .../src/main/scala/cromwell/core/PathFactory.scala | 135 ---- .../scala/cromwell/core/WorkflowMetadataKeys.scala | 2 + .../scala/cromwell/core/WorkflowSourceFiles.scala | 9 - .../core/WorkflowSourceFilesCollection.scala | 43 ++ .../core/callcaching/CallCachingMode.scala | 6 +- .../scala/cromwell/core/logging/JobLogger.scala | 2 +- .../cromwell/core/logging/WorkflowLogger.scala | 8 +- core/src/main/scala/cromwell/core/package.scala | 2 +- .../cromwell/core/path/CustomRetryParams.scala | 25 + .../cromwell/core/path/DefaultPathBuilder.scala | 21 + .../core/path/DefaultPathBuilderFactory.scala | 8 + .../cromwell/core/path/JavaWriterImplicits.scala | 13 + .../scala/cromwell/core/path/PathBuilder.scala | 10 + .../cromwell/core/path/PathBuilderFactory.scala | 11 + .../cromwell/core/{ => path}/PathCopier.scala | 23 +- .../scala/cromwell/core/path/PathFactory.scala | 57 ++ .../scala/cromwell/core/path/PathImplicits.scala | 15 + .../cromwell/core/path/PathParsingException.scala | 5 + .../main/scala/cromwell/core/path/PathWriter.scala | 76 ++ .../cromwell/core/path/proxy/FileSystemProxy.scala | 25 + .../scala/cromwell/core/path/proxy/PathProxy.scala | 44 ++ .../proxy/RetryableFileSystemProviderProxy.scala | 57 ++ .../main/scala/cromwell/core/retry/Backoff.scala | 2 +- .../src/main/scala/cromwell/core/retry/Retry.scala | 4 +- .../cromwell/core/simpleton/WdlValueBuilder.scala | 32 +- .../core/simpleton/WdlValueSimpleton.scala | 2 + .../JsonFormatting/WdlValueJsonFormatter.scala | 1 + .../main/scala/cromwell/util/PromiseActor.scala | 38 +- .../scala/cromwell/util/StopAndLogSupervisor.scala | 24 + core/src/main/scala/cromwell/util/TryUtil.scala | 45 ++ .../core/path/RetryableFileSystemProxySpec.scala | 278 +++++++ .../core/simpleton/WdlValueBuilderSpec.scala | 129 +++- core/src/test/scala/cromwell/util/SampleWdl.scala | 19 +- .../cromwell/util/WdlValueJsonFormatterSpec.scala | 28 + .../migration/src/main/resources/changelog.xml | 5 + .../changesets/embiggen_metadata_value.xml | 50 ++ .../encrypt_and_clear_workflow_options.xml | 14 + .../rename_workflow_options_in_metadata.xml | 10 + .../resources/changesets/sub_workflow_store.xml | 62 ++ .../changesets/workflow_store_imports_file.xml | 15 + database/migration/src/main/resources/logback.xml | 36 - .../migration/custom/BatchedTaskChange.scala | 139 ++++ .../migration/custom/MigrationTaskChange.scala | 48 ++ .../table/symbol => custom}/QueryPaginator.scala | 2 +- .../symbol => custom}/ResultSetIterator.scala | 2 +- .../symbol/CallOutputSymbolTableMigration.scala | 1 - .../table/symbol/InputSymbolTableMigration.scala | 1 - .../metadata/table/symbol/MetadataStatement.scala | 5 +- .../table/symbol/SymbolTableMigration.scala | 109 +-- .../WorkflowOutputSymbolTableMigration.scala | 1 - .../table/RenameWorkflowOptionKeysMigration.scala | 18 +- .../ClearMetadataEntryWorkflowOptions.scala | 15 + .../EncryptWorkflowStoreEntryWorkflowOptions.scala | 14 + .../RenameWorkflowOptionsInMetadata.scala | 38 + .../workflowoptions/WorkflowOptionsChange.scala | 69 ++ .../workflowoptions/WorkflowOptionsRenaming.scala | 21 + .../database/slick/CallCachingSlickDatabase.scala | 1 + .../cromwell/database/slick/SlickDatabase.scala | 3 +- .../slick/SubWorkflowStoreSlickDatabase.scala | 67 ++ .../slick/tables/DataAccessComponent.scala | 6 +- .../tables/SubWorkflowStoreEntryComponent.scala | 62 ++ .../slick/tables/WorkflowStoreEntryComponent.scala | 6 +- .../scala/cromwell/database/sql/SqlDatabase.scala | 3 +- .../database/sql/SubWorkflowStoreSqlDatabase.scala | 21 + .../sql/tables/SubWorkflowStoreEntry.scala | 12 + .../database/sql/tables/WorkflowStoreEntry.scala | 3 +- engine/src/main/resources/logback.xml | 36 - engine/src/main/resources/swagger/cromwell.yaml | 17 + .../resources/workflowTimings/workflowTimings.html | 184 +++-- .../scala/cromwell/engine/EngineFilesystems.scala | 47 +- .../cromwell/engine/EngineWorkflowDescriptor.scala | 26 +- .../main/scala/cromwell/engine/WdlFunctions.scala | 12 +- .../engine/backend/EnhancedWorkflowOptions.scala | 16 - .../workflow/SingleWorkflowRunnerActor.scala | 258 ++++--- .../cromwell/engine/workflow/WorkflowActor.scala | 83 +- .../engine/workflow/WorkflowManagerActor.scala | 102 ++- .../workflow/lifecycle/CopyWorkflowLogsActor.scala | 3 +- .../lifecycle/CopyWorkflowOutputsActor.scala | 46 +- .../MaterializeWorkflowDescriptorActor.scala | 152 +++- .../lifecycle/WorkflowFinalizationActor.scala | 30 +- .../lifecycle/WorkflowInitializationActor.scala | 4 +- .../lifecycle/execution/CallMetadataHelper.scala | 135 ++++ .../execution/EngineJobExecutionActor.scala | 137 ++-- .../lifecycle/execution/ExecutionStore.scala | 109 +++ .../lifecycle/execution/JobPreparationActor.scala | 151 ++-- .../workflow/lifecycle/execution/OutputStore.scala | 98 +++ .../execution/SubWorkflowExecutionActor.scala | 275 +++++++ .../workflow/lifecycle/execution/WdlLookup.scala | 106 --- .../execution/WorkflowExecutionActor.scala | 858 +++++++++++---------- .../execution/WorkflowExecutionActorData.scala | 118 ++- .../execution/WorkflowMetadataHelper.scala | 37 + .../execution/callcaching/CallCache.scala | 10 +- .../callcaching/CallCacheInvalidateActor.scala | 1 + .../callcaching/CallCacheWriteActor.scala | 6 +- .../callcaching/EngineJobHashingActor.scala | 4 +- .../workflow/lifecycle/execution/package.scala | 12 +- .../tokens/JobExecutionTokenDispenserActor.scala | 2 +- .../workflowstore/InMemoryWorkflowStore.scala | 6 +- .../workflow/workflowstore/SqlWorkflowStore.scala | 28 +- .../workflow/workflowstore/WorkflowStore.scala | 4 +- .../workflowstore/WorkflowStoreActor.scala | 48 +- .../workflow/workflowstore/workflowstore_.scala | 4 +- .../cromwell/jobstore/EmptyJobStoreActor.scala | 15 + .../main/scala/cromwell/jobstore/jobstore_.scala | 2 +- .../scala/cromwell/server/CromwellRootActor.scala | 10 +- .../scala/cromwell/server/CromwellServer.scala | 3 + .../EmptySubWorkflowStoreActor.scala | 17 + .../subworkflowstore/SqlSubWorkflowStore.scala | 31 + .../subworkflowstore/SubWorkflowStore.scala | 19 + .../subworkflowstore/SubWorkflowStoreActor.scala | 72 ++ .../cromwell/webservice/CromwellApiHandler.scala | 8 +- .../cromwell/webservice/CromwellApiService.scala | 122 ++- .../cromwell/webservice/EngineStatsActor.scala | 10 +- .../cromwell/webservice/WorkflowJsonSupport.scala | 16 +- .../webservice/metadata/IndexedJsonValue.scala | 22 +- .../webservice/metadata/MetadataBuilderActor.scala | 122 ++- .../scala/cromwell/ArrayOfArrayCoercionSpec.scala | 4 +- .../test/scala/cromwell/ArrayWorkflowSpec.scala | 18 +- .../scala/cromwell/CallCachingWorkflowSpec.scala | 2 +- .../scala/cromwell/CopyWorkflowOutputsSpec.scala | 6 +- ...TestkitSpec.scala => CromwellTestKitSpec.scala} | 44 +- .../scala/cromwell/DeclarationWorkflowSpec.scala | 12 +- .../scala/cromwell/FilePassingWorkflowSpec.scala | 14 +- .../src/test/scala/cromwell/MapWorkflowSpec.scala | 16 +- .../test/scala/cromwell/MetadataWatchActor.scala | 20 +- .../MultipleFilesWithSameNameWorkflowSpec.scala | 6 +- .../scala/cromwell/OptionalParamWorkflowSpec.scala | 12 +- .../cromwell/PostfixQuantifierWorkflowSpec.scala | 12 +- .../test/scala/cromwell/RestartWorkflowSpec.scala | 6 +- .../test/scala/cromwell/ScatterWorkflowSpec.scala | 40 +- .../scala/cromwell/SimpleWorkflowActorSpec.scala | 32 +- .../cromwell/WdlFunctionsAtWorkflowLevelSpec.scala | 6 +- .../test/scala/cromwell/WorkflowFailSlowSpec.scala | 2 +- .../test/scala/cromwell/WorkflowOutputsSpec.scala | 20 +- .../cromwell/engine/EngineFunctionsSpec.scala | 12 +- .../scala/cromwell/engine/WorkflowAbortSpec.scala | 4 +- .../cromwell/engine/WorkflowManagerActorSpec.scala | 6 +- .../cromwell/engine/WorkflowStoreActorSpec.scala | 81 +- .../mock/DefaultBackendJobExecutionActor.scala | 8 +- .../mock/RetryableBackendJobExecutionActor.scala | 6 +- .../RetryableBackendLifecycleActorFactory.scala | 4 +- .../cromwell/engine/backend/mock/package.scala | 2 +- .../workflow/SingleWorkflowRunnerActorSpec.scala | 29 +- .../engine/workflow/WorkflowActorSpec.scala | 26 +- .../workflow/WorkflowDescriptorBuilder.scala | 10 +- .../MaterializeWorkflowDescriptorActorSpec.scala | 68 +- .../execution/SubWorkflowExecutionActorSpec.scala | 213 +++++ .../execution/WorkflowExecutionActorSpec.scala | 14 +- .../callcaching/EngineJobHashingActorSpec.scala | 22 +- .../EjeaBackendIsCopyingCachedOutputsSpec.scala | 39 +- .../execution/ejea/EjeaCheckingJobStoreSpec.scala | 14 +- .../execution/ejea/EjeaPreparingJobSpec.scala | 14 +- .../ejea/EjeaRequestingExecutionTokenSpec.scala | 4 +- .../execution/ejea/EjeaUpdatingJobStoreSpec.scala | 4 +- .../ejea/EngineJobExecutionActorSpec.scala | 4 +- .../ejea/EngineJobExecutionActorSpecUtil.scala | 10 +- .../lifecycle/execution/ejea/PerTestHelper.scala | 83 +- .../engine/workflow/mocks/DeclarationMock.scala | 21 + .../cromwell/engine/workflow/mocks/TaskMock.scala | 27 + .../engine/workflow/mocks/WdlExpressionMock.scala | 32 + .../cromwell/jobstore/JobStoreServiceSpec.scala | 13 +- .../cromwell/jobstore/JobStoreWriterSpec.scala | 4 +- .../subworkflowstore/SubWorkflowStoreSpec.scala | 87 +++ .../webservice/CromwellApiServiceSpec.scala | 14 +- .../cromwell/webservice/EngineStatsActorSpec.scala | 10 +- .../webservice/MetadataBuilderActorSpec.scala | 115 ++- .../filesystems/gcs/ContentTypeOption.scala | 15 - .../filesystems/gcs/GcsFileAttributes.scala | 23 - .../cromwell/filesystems/gcs/GcsFileSystem.scala | 68 -- .../filesystems/gcs/GcsFileSystemProvider.scala | 295 ------- .../cromwell/filesystems/gcs/GcsPathBuilder.scala | 100 +++ .../filesystems/gcs/GcsPathBuilderFactory.scala | 48 ++ .../cromwell/filesystems/gcs/GoogleAuthMode.scala | 186 ----- .../filesystems/gcs/GoogleConfiguration.scala | 14 +- .../cromwell/filesystems/gcs/NioGcsPath.scala | 191 ----- .../filesystems/gcs/auth/GoogleAuthMode.scala | 187 +++++ .../gcs/auth/RefreshableOAuth2Credentials.scala | 31 + .../scala/cromwell/filesystems/gcs/package.scala | 6 - .../filesystems/gcs/GcsIntegrationTest.scala | 5 - .../filesystems/gcs/GcsPathBuilderSpec.scala | 31 + .../filesystems/gcs/GoogleConfigurationSpec.scala | 16 +- .../gcs/GoogleCredentialFactorySpec.scala | 158 ---- .../filesystems/gcs/MockGcsFileSystemBuilder.scala | 9 - .../cromwell/filesystems/gcs/NioGcsPathSpec.scala | 291 ------- .../filesystems/gcs/RefreshTokenModeSpec.scala | 26 - project/Dependencies.scala | 53 +- project/Merging.scala | 5 + project/Settings.scala | 31 +- project/Testing.scala | 20 +- project/Version.scala | 7 +- .../services/metadata/CallMetadataKeys.scala | 2 + .../cromwell/services/metadata/MetadataQuery.scala | 11 +- .../services/metadata/MetadataService.scala | 3 +- .../metadata/impl/MetadataDatabaseAccess.scala | 14 +- .../services/metadata/impl/ReadMetadataActor.scala | 11 +- .../cromwell/services/ServicesStoreSpec.scala | 1 + src/bin/travis/afterSuccess.sh | 20 + src/bin/travis/publishSnapshot.sh | 11 - src/bin/travis/resources/centaur.inputs | 10 +- src/bin/travis/resources/centaur.wdl | 8 +- src/bin/travis/resources/local_centaur.conf | 3 + src/bin/travis/test.sh | 2 + src/bin/travis/testCentaurJes.sh | 6 +- src/bin/travis/testCentaurLocal.sh | 4 +- src/bin/travis/testCheckPublish.sh | 6 + src/main/scala/cromwell/CromwellCommandLine.scala | 14 +- src/main/scala/cromwell/Main.scala | 16 +- .../scala/cromwell/CromwellCommandLineSpec.scala | 23 +- .../impl/htcondor/HtCondorBackendFactory.scala | 10 +- .../htcondor/HtCondorInitializationActor.scala | 6 +- .../impl/htcondor/HtCondorJobExecutionActor.scala | 52 +- .../impl/htcondor/HtCondorRuntimeAttributes.scala | 27 +- .../backend/impl/htcondor/HtCondorWrapper.scala | 27 +- .../backend/impl/htcondor/caching/CacheActor.scala | 6 +- .../localization/CachedResultLocalization.scala | 2 +- .../caching/model/CachedExecutionResult.scala | 4 +- .../caching/provider/mongodb/MongoCacheActor.scala | 4 +- .../impl/htcondor/HtCondorCommandSpec.scala | 16 +- .../htcondor/HtCondorInitializationActorSpec.scala | 8 +- .../htcondor/HtCondorJobExecutionActorSpec.scala | 48 +- .../htcondor/HtCondorRuntimeAttributesSpec.scala | 50 +- .../CachedResultLocalizationSpec.scala | 4 +- .../provider/mongodb/MongoCacheActorSpec.scala | 8 +- .../backend/impl/jes/GenomicsFactory.scala | 24 +- .../jes/JesAsyncBackendJobExecutionActor.scala | 276 +++---- .../cromwell/backend/impl/jes/JesAttributes.scala | 24 +- .../impl/jes/JesBackendLifecycleActorFactory.scala | 29 +- .../impl/jes/JesBackendSingletonActor.scala | 8 +- .../backend/impl/jes/JesCacheHitCopyingActor.scala | 6 +- .../cromwell/backend/impl/jes/JesCallPaths.scala | 82 -- .../backend/impl/jes/JesConfiguration.scala | 28 +- .../backend/impl/jes/JesExpressionFunctions.scala | 38 +- .../backend/impl/jes/JesFinalizationActor.scala | 31 +- .../cromwell/backend/impl/jes/JesImplicits.scala | 41 - .../backend/impl/jes/JesInitializationActor.scala | 37 +- .../impl/jes/JesJobCachingActorHelper.scala | 44 +- .../backend/impl/jes/JesJobExecutionActor.scala | 25 +- .../cromwell/backend/impl/jes/JesJobPaths.scala | 60 ++ .../backend/impl/jes/JesWorkflowPaths.scala | 62 +- .../main/scala/cromwell/backend/impl/jes/Run.scala | 7 +- .../backend/impl/jes/authentication/JesAuths.scala | 5 + .../impl/jes/authentication/JesCredentials.scala | 5 - .../jes/authentication/JesVMAuthentication.scala | 2 +- .../jes/callcaching/JesBackendFileHashing.scala | 4 +- .../cromwell/backend/impl/jes/io/package.scala | 9 +- .../jes/statuspolling/JesApiQueryManager.scala | 22 +- .../impl/jes/statuspolling/JesPollingActor.scala | 46 +- .../jes/statuspolling/JesPollingActorClient.scala | 5 +- .../jes/JesAsyncBackendJobExecutionActorSpec.scala | 145 ++-- .../backend/impl/jes/JesAttributesSpec.scala | 20 +- .../backend/impl/jes/JesCallPathsSpec.scala | 37 +- .../backend/impl/jes/JesConfigurationSpec.scala | 19 +- .../impl/jes/JesInitializationActorSpec.scala | 20 +- .../impl/jes/JesJobExecutionActorSpec.scala | 111 +++ .../cromwell/backend/impl/jes/JesTestConfig.scala | 2 +- .../backend/impl/jes/JesWorkflowPathsSpec.scala | 18 +- .../cromwell/backend/impl/jes/MockObjects.scala | 9 - .../scala/cromwell/backend/impl/jes/RunSpec.scala | 6 +- .../jes/statuspolling/JesApiQueryManagerSpec.scala | 63 +- .../jes/statuspolling/JesPollingActorSpec.scala | 19 +- .../sfs/config/ConfigAsyncJobExecutionActor.scala | 23 +- .../impl/sfs/config/ConfigBackendFileHashing.scala | 12 +- .../impl/sfs/config/ConfigHashingStrategy.scala | 26 +- .../impl/sfs/config/ConfigWdlNamespace.scala | 4 +- .../impl/sfs/config/DeclarationValidation.scala | 18 +- .../sfs/GcsWorkflowFileSystemProvider.scala | 36 - .../cromwell/backend/sfs/SharedFileSystem.scala | 60 +- .../SharedFileSystemAsyncJobExecutionActor.scala | 65 +- ...redFileSystemBackendLifecycleActorFactory.scala | 28 +- .../sfs/SharedFileSystemCacheHitCopyingActor.scala | 7 +- .../sfs/SharedFileSystemExpressionFunctions.scala | 28 +- .../sfs/SharedFileSystemInitializationActor.scala | 18 +- .../SharedFileSystemJobCachingActorHelper.scala | 5 +- .../sfs/config/ConfigHashingStrategySpec.scala | 10 + .../SharedFileSystemInitializationActorSpec.scala | 10 +- .../SharedFileSystemJobExecutionActorSpec.scala | 53 +- .../backend/sfs/SharedFileSystemSpec.scala | 20 +- .../sfs/TestLocalAsyncJobExecutionActor.scala | 4 +- .../backend/impl/spark/SparkBackendFactory.scala | 10 +- .../impl/spark/SparkInitializationActor.scala | 6 +- .../impl/spark/SparkJobExecutionActor.scala | 44 +- .../cromwell/backend/impl/spark/SparkProcess.scala | 23 +- .../impl/spark/SparkInitializationActorSpec.scala | 7 +- .../impl/spark/SparkJobExecutionActorSpec.scala | 50 +- .../impl/spark/SparkRuntimeAttributesSpec.scala | 11 +- 334 files changed, 8400 insertions(+), 4793 deletions(-) create mode 100644 MakingABackend.MD create mode 100644 SecurityRecommendations.md create mode 100644 backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala delete mode 100644 backend/src/main/scala/cromwell/backend/OutputEvaluator.scala create mode 100644 backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala create mode 100644 backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala create mode 100644 backend/src/main/scala/cromwell/backend/wdl/Command.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala create mode 100644 backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala create mode 100644 backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala delete mode 100644 backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala create mode 100644 backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala create mode 100644 backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala create mode 100644 core/src/main/resources/logback.xml create mode 100644 core/src/main/scala/cromwell/core/CallKey.scala delete mode 100644 core/src/main/scala/cromwell/core/ExecutionStore.scala delete mode 100644 core/src/main/scala/cromwell/core/OutputStore.scala delete mode 100644 core/src/main/scala/cromwell/core/PathFactory.scala delete mode 100644 core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala create mode 100644 core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala create mode 100644 core/src/main/scala/cromwell/core/path/CustomRetryParams.scala create mode 100644 core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala create mode 100644 core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala create mode 100644 core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathBuilder.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala rename core/src/main/scala/cromwell/core/{ => path}/PathCopier.scala (65%) create mode 100644 core/src/main/scala/cromwell/core/path/PathFactory.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathImplicits.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathParsingException.scala create mode 100644 core/src/main/scala/cromwell/core/path/PathWriter.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala create mode 100644 core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala create mode 100644 core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala create mode 100644 core/src/main/scala/cromwell/util/TryUtil.scala create mode 100644 core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala create mode 100644 core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala create mode 100644 database/migration/src/main/resources/changesets/embiggen_metadata_value.xml create mode 100644 database/migration/src/main/resources/changesets/encrypt_and_clear_workflow_options.xml create mode 100644 database/migration/src/main/resources/changesets/rename_workflow_options_in_metadata.xml create mode 100644 database/migration/src/main/resources/changesets/sub_workflow_store.xml create mode 100644 database/migration/src/main/resources/changesets/workflow_store_imports_file.xml delete mode 100644 database/migration/src/main/resources/logback.xml create mode 100644 database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala rename database/migration/src/main/scala/cromwell/database/migration/{metadata/table/symbol => custom}/QueryPaginator.scala (88%) rename database/migration/src/main/scala/cromwell/database/migration/{metadata/table/symbol => custom}/ResultSetIterator.scala (73%) create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala create mode 100644 database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala create mode 100644 database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala create mode 100644 database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala create mode 100644 database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala create mode 100644 database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala delete mode 100644 engine/src/main/resources/logback.xml delete mode 100644 engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala delete mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala rename engine/src/{test => main}/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala (88%) create mode 100644 engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala create mode 100644 engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala rename engine/src/test/scala/cromwell/{CromwellTestkitSpec.scala => CromwellTestKitSpec.scala} (94%) create mode 100644 engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala create mode 100644 engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala create mode 100644 engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala create mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala delete mode 100644 filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala create mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala delete mode 100644 filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala create mode 100755 src/bin/travis/afterSuccess.sh delete mode 100755 src/bin/travis/publishSnapshot.sh create mode 100755 src/bin/travis/testCheckPublish.sh delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala create mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala delete mode 100644 supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala create mode 100644 supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala delete mode 100644 supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala delete mode 100644 supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala diff --git a/.travis.yml b/.travis.yml index 5784eaf62..f4d0d9b2e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,14 +6,18 @@ scala: jdk: - oraclejdk8 env: - # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked - - BUILD_TYPE=sbt - - BUILD_TYPE=centaurJes - - BUILD_TYPE=centaurLocal + global: + - CENTAUR_BRANCH=develop + matrix: + # Setting this variable twice will cause the 'script' section to run twice with the respective env var invoked + - BUILD_TYPE=sbt + - BUILD_TYPE=checkPublish + - BUILD_TYPE=centaurJes + - BUILD_TYPE=centaurLocal script: - src/bin/travis/test.sh after_success: - - src/bin/travis/publishSnapshot.sh + - src/bin/travis/afterSuccess.sh deploy: provider: script script: src/bin/travis/publishRelease.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index cf48b60be..3f7b649ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,124 @@ # Cromwell Change Log +## 23 + +* The `meta` and `parameter_meta` blocks are now valid within `workflow` blocks, not just `task` +* The JES backend configuration now has an option `genomics-api-queries-per-100-seconds` to help tune the rate of batch polling against the JES servers. Users with quotas larger than default should make sure to set this value. +* Added an option `call-caching.invalidate-bad-cache-results` (default: `true`). If true, Cromwell will invalidate cached results which have failed to copy as part of a cache hit. +* Timing diagrams and metadata now receive more fine grained workflow states between submission and Running. +* Support for the Pair WDL type (e.g. `Pair[Int, File] floo = (3, "gs://blar/blaz/qlux.txt")`) +* Added support for new WDL functions: + * `zip: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - align items in the two arrays by index and return them as WDL pairs + * `cross: (Array[X], Array[Y]) => Array[Pair[X, Y]]` - create every possible pair from the two input arrays and return them all as WDL pairs + * `transpose: (Array[Array[X]]) => Array[Array[X]]` compute the matrix transpose for a 2D array. Assumes each inner array has the same length. +* By default, `system.abort-jobs-on-terminate` is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. +* Enable WDL imports when running in Single Workflow Runner Mode. +* Support for sub workflows (see [Annex A](#annex-a---workflow-outputs)) +* Enable WDL imports when running in Single Workflow Runner Mode as well as Server Mode +* Support for WDL imports through an additional imports.zip parameter +* Support for sub workflows +* Corrected file globbing in JES to correctly report all generated files. Additionally, file globbing in JES now uses bash-style glob syntax instead of python style glob syntax +* Support declarations as graph nodes +* Added the ability to override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. More details can be found in the README.md +* Fix bugs related to the behavior of Cromwell in Single Workflow Runner Mode. Cromwell will now exit once a workflow completes in Single Workflow Runner Mode. Additionally, when restarting Cromwell in Single Workflow Runner Mode, Cromwell will no longer restart incomplete workflows from a previous session. + +### Annex A - Workflow outputs + +The WDL specification has changed regarding [workflow outputs](https://github.com/broadinstitute/wdl/blob/develop/SPEC.md#outputs) to accommodate sub workflows. +This change is backward compatible in terms of runnable WDLs (WDL files using the deprecated workflow outputs syntax will still run the same). +The only visible change lies in the metadata (as well as the console output in single workflow mode, when workflow outputs are printed out at the end of a successful workflow). + +TL;DR Unless you are parsing or manipulating the "key" by which workflow outputs are referenced in the metadata (and/or the console output for single workflow mode), you can skip the following explanation. + +*Metadata Response* +``` +{ + ... + outputs { + "task_output_1": "hello", + "task_output_2": "world" + ^ + If you don't manipulate this part of the metadata, then skip this section + } +} +``` + +In order to maintain backward compatibility, workflow outputs expressed with the deprecated syntax are "expanded" to the new syntax. Here is an example: + +``` +task t { + command { + #do something + } + output { + String out1 = "hello" + String out2 = "world" + } +} +``` + +``` + workflow old_syntax { + call t + output { + t.* + } + } +``` + +``` + workflow new_syntax { + call t + output { + String wf_out1 = t.out1 + String wf_out2 = t.out2 + } + } +``` + +The new syntax allows for type checking of the outputs as well as expressions. It also allows for explicitly naming to the outputs. +The old syntax doesn't give the ability to name workflow outputs. For consistency reasons, Cromwell will generate a "new syntax" workflow output for each task output, and name them. +Their name will be generated using their FQN, which would give + +``` +output { + String w.t.out1 = t.out1 + String w.t.out2 = t.out2 +} +``` + +However as the FQN separator is `.`, the name itself cannot contain any `.`. +For that reason, `.` are replaced with `_` : + +*Old syntax expanded to new syntax* +``` +output { + String w_t_out1 = t.out1 + String w_t_out2 = t.out2 +} +``` + +The consequence is that the workflow outputs section of the metadata for `old_syntax` would previously look like + + ``` + outputs { + "w.t.out1": "hello", + "w.t.out2": "hello" + } + ``` + +but it will now look like + +``` + outputs { + "w_t_out1": "hello", + "w_t_out2": "hello" + } +``` + +The same applies for the console output of a workflow run in single workflow mode. + + ## 0.22 * Improved retries for Call Caching and general bug fixes. diff --git a/MakingABackend.MD b/MakingABackend.MD new file mode 100644 index 000000000..5106b3726 --- /dev/null +++ b/MakingABackend.MD @@ -0,0 +1,197 @@ +# Making a backend + +## Part 0: Introduction + +- These notes were added while making a new AWS backend for Amazon AWS. + +## Part 1 (October 13 2016): The skeleton: + +To start with, I just need to create a bunch of boilerplate which will eventually be filled in with all of the lovely AWS details! + +### Defining the awsBackend project: + +- Added entries to `project/Settings.scala`, `project/Dependencies.scala` and `build.sbt` +- This was mainly just a copy/paste from existing backend projects. I made a few typos renaming everything and linking the dependencies properly though! +- E.g. In my first commit I forgot to update the libraryDependencies name for my AWS backend project: +``` + val awsBackendSettings = List( + name := "cromwell-aws-backend", + libraryDependencies ++= awsBackendDependencies + ) ++ commonSettings +``` +- I guessed that I'd need the AWS SDK so I included that immediately in Dependencies.scala: +``` + val awsBackendDependencies = List( + "com.amazonaws" % "aws-java-sdk" % "1.11.41" + ) +``` +- In build.scala I had to also edit the `lazy val root` to include a new `.aggregate(awsBackend)` and a new `.dependsOn(awsBackend)` + +### Directory structure: + +- This is probably going to be autogenerated for you in the directories specified in the above files. I'd already added my own directory structure and sbt managed to pick it up correctly in `supportedBackends/aws`. + +### AWS Job Execution Actor: +- To run a job, Cromwell needs to instantiate a Job Execution actor. I'll fill in the details later but for now, I'll just add the constructor, props, and an unimplemented method definition for `execute`: +``` +class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, + override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { + + override def execute: Future[BackendJobExecutionResponse] = ??? +} + +object AwsJobExecutionActor { + def props(jobDescriptor: BackendJobDescriptor, + configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new AwsJobExecutionActor(jobDescriptor, configurationDescriptor)) +} +``` + +### Actor factory: +- This is the class which tells Cromwell which classes represent job execution actors, initialization actors and so on. I'm just adding a skeleton for now, with a constructor of the form the Cromwell expects: +``` +case class AwsBackendActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { + + override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + backendSingletonActor: Option[ActorRef]): Props = AwsJobExecutionActor.props(jobDescriptor, configurationDescriptor) +} +``` +- There are a few other actor definitions that can be added to this file over time. But the only one that Cromwell *requires* to work is the job execution actor. + +### Reference conf: + +- Reference.conf is a set of reference options which shows people how to enable the backends that they want. So I'll add the initial config which people would add if they wanted the AWS backend (commented out in the reference so it's not enabled by default). This goes below all the other backend references: +``` + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # + # } + #} +``` + +### Application.conf + +- OK so I've now told people how to add this backend... Now I actually add it to my own personal configuration file so I can try it out! +``` +backend { + default = "AWS" + providers { + AWS { + actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + config { + + } + } + } +} +``` + +### Trying it out +So we now have a backend skeleton! What happens when we run it? Well hopefully Cromwell will instantiate the backend far enough to reach the unimplemented execute method and then fall over. Let's give it a go! +- I fire up cromwell in server mode with my modified application.conf. +- I create a sample WDL that would sleep for 20 seconds if it actually worked: +The input WDL: +``` +task sleep { + command { sleep 20 } +} +workflow main { + call sleep +} +``` +- I submit the WDL to the swagger endpoint (http://localhost:8000/swagger/index.html?url=/swagger/cromwell.yaml) and watch the server logs... +- And as expected: +``` +2016-10-13 13:14:29,017 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - MaterializeWorkflowDescriptorActor [UUID(ddd827ba)]: Call-to-Backend assignments: main.sleep -> AWS +2016-10-13 13:14:30,167 cromwell-system-akka.dispatchers.engine-dispatcher-39 INFO - WorkflowExecutionActor-ddd827ba-091f-4c6f-b98f-cc9825717007 [UUID(ddd827ba)]: Starting calls: main.sleep:NA:1 +2016-10-13 13:14:30,983 cromwell-system-akka.actor.default-dispatcher-5 ERROR - guardian failed, shutting down system +scala.NotImplementedError: an implementation is missing + at scala.Predef$.$qmark$qmark$qmark(Predef.scala:230) + at cromwell.backend.impl.aws.AwsJobExecutionActor.execute(AwsJobExecutionActor.scala:12) +``` +- OK, so now I just need to implement `execute(): Future[JobExecutionResult]` and Cromwell can interface with AWS. How hard can it be! + +## Part 2 (October 13 2016): Using Amazon to sleep 20 seconds + +### Starting point +- This was a learning experience after using the Google pipelines service to submit jobs! +- To get myself started, I've manually created an ECS cluster which I've called `ecs-t2micro-cluster` via the ECS web console. + +### Trial and Error + +- I see in the aws sdk docs that there's an AmazonECSAsyncClient class. That sounds promising! Luckily I already added the dependency on AWS SDK in Part 1 so I guess I can just write something basic in my AwsJobExecutionActor class and see what happens: + +- I ended up having to add some credentials options to the configuration file. The new `reference.conf` now looks like: +``` + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # ## These two settings are required to authenticate with the ECS service: + # accessKeyId = "..." + # secretKey = "..." + # } + #} +``` + +- After a little bit of experimentation with the ECS API, I was able to come up with a backend that works but is very limited... It is entirely synchronous in the `execute` method. That's certainly not a final answer but it works OK for running a single task. And we can now run that single `sleep` command successfully on the Amazon EC2 Container Service! + - The synchronous `execute` method: +``` +class AwsJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, + override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { + + val awsAccessKeyId = configurationDescriptor.backendConfig.as[String]("accessKeyId") + val awsSecretKey = configurationDescriptor.backendConfig.as[String]("secretKey") + + val clusterName = "ecs-t2micro-cluster" + + val credentials = new AWSCredentials { + override def getAWSAccessKeyId: String = awsAccessKeyId + override def getAWSSecretKey: String = awsSecretKey + } + val ecsAsyncClient = new AmazonECSAsyncClient(credentials) + + override def execute: Future[BackendJobExecutionResponse] = { + + val commandOverride = new ContainerOverride().withName("simple-app").withCommand(jobDescriptor.call.instantiateCommandLine(Map.empty, OnlyPureFunctions, identity).get) + + val runRequest: RunTaskRequest = new RunTaskRequest() + .withCluster(clusterName) + .withCount(1) + .withTaskDefinition("ubuntuTask:1") + .withOverrides(new TaskOverride().withContainerOverrides(commandOverride)) + + val submitResultHandler = new AwsSdkAsyncHandler[RunTaskRequest, RunTaskResult]() + val _ = ecsAsyncClient.runTaskAsync(runRequest, submitResultHandler) + + submitResultHandler.future map { + case AwsSdkAsyncResult(_, result) => + log.info("AWS submission completed:\n{}", result.toString) + val taskArn= result.getTasks.asScala.head.getTaskArn + val taskDescription = waitUntilDone(taskArn) + + log.info("AWS task completed!\n{}", taskDescription.toString) + SucceededResponse(jobDescriptor.key, Option(0), Map.empty, None, Seq.empty) + } + } + + private def waitUntilDone(taskArn: String): Task = { + val describeTasksRequest = new DescribeTasksRequest().withCluster(clusterName).withTasks(List(taskArn).asJava) + + val resultHandler = new AwsSdkAsyncHandler[DescribeTasksRequest, DescribeTasksResult]() + val _ = ecsAsyncClient.describeTasksAsync(describeTasksRequest, resultHandler) + + val desribedTasks = Await.result(resultHandler.future, Duration.Inf) + val taskDescription = desribedTasks.result.getTasks.asScala.head + if (taskDescription.getLastStatus == DesiredStatus.STOPPED.toString) { + taskDescription + } else { + Thread.sleep(200) + waitUntilDone(taskArn) + } + } +} +``` + + diff --git a/README.md b/README.md index 1800278e9..ca5c14576 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Workflow Submission](#workflow-submission) * [Database](#database) * [SIGINT abort handler](#sigint-abort-handler) +* [Security](#security) * [Backends](#backends) * [Backend Filesystems](#backend-filesystems) * [Shared Local Filesystem](#shared-local-filesystem) @@ -35,6 +36,7 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Caching configuration](#caching-configuration) * [Docker](#docker) * [CPU, Memory and Disk](#cpu-memory-and-disk) + * [Native Specifications](#native-specifications) * [Spark Backend](#spark-backend) * [Configuring Spark Project](#configuring-spark-project) * [Configuring Spark Master and Deploy Mode](#configuring-spark-master-and-deploy-mode) @@ -63,6 +65,12 @@ A [Workflow Management System](https://en.wikipedia.org/wiki/Workflow_management * [Logging](#logging) * [Workflow Options](#workflow-options) * [Call Caching](#call-caching) + * [Configuring Call Caching](#configuring-call-caching) + * [Call Caching Workflow Options](#call-caching-workflow-options) + * [Local Filesystem Options](#local-filesystem-options) +* [Imports](#imports) +* [Sub Workflows](#sub-workflows) +* [Meta blocks](#meta-blocks) * [REST API](#rest-api) * [REST API Versions](#rest-api-versions) * [POST /api/workflows/:version](#post-apiworkflowsversion) @@ -124,23 +132,29 @@ See the [migration document](MIGRATION.md) for more details. Run the JAR file with no arguments to get the usage message: ``` + + $ java -jar cromwell.jar java -jar cromwell.jar Actions: -run [ [ - []]] +run [] [] + [] [] Given a WDL file and JSON file containing the value of the workflow inputs, this will run the workflow locally and print out the outputs in JSON format. The workflow options file specifies some runtime configuration for the workflow (see README for details). The workflow metadata - output is an optional file path to output the metadata. + output is an optional file path to output the metadata. The + directory of WDL files is optional. However, it is required + if the primary workflow imports workflows that are outside + of the root directory of the Cromwell project. + Use a single dash ("-") to skip optional files. Ex: - run noinputs.wdl - - metadata.json + run noinputs.wdl - - metadata.json - -server + server Starts a web server on port 8000. See the web server documentation for more details about the API endpoints. @@ -232,6 +246,39 @@ $ cat my_wf.metadata.json } ``` +The fifth, optional parameter to the 'run' subcommand is a zip file which contains WDL source files. This zip file can be passed +and your primary workflow can import any WDL's from that collection and re-use those tasks. + +For example, consider you have a directory of WDL files: +``` +my_WDLs +└──cgrep.wdl +└──ps.wdl +└──wc.wdl +``` + +If you zip that directory to my_WDLs.zip, you have the option to pass it in as the last parameter in your run command +and be able to reference these WDLs as imports in your primary WDL. For example, your primary WDL can look like this: +``` +import "ps.wdl" as ps +import "cgrep.wdl" +import "wc.wdl" as wordCount + +workflow threestep { + +call ps.ps as getStatus +call cgrep.cgrep { input: str = getStatus.x } +call wordCount { input: str = ... } + +} + +``` +The command to run this WDL, without needing any inputs, workflow options or metadata files would look like: + +``` +$ java -jar cromwell.jar run threestep.wdl - - - /path/to/my_WDLs.zip +``` + ## server Start a server on port 8000, the API for the server is described in the [REST API](#rest-api) section. @@ -292,17 +339,14 @@ Then, edit the configuration file `database` stanza, as follows: ``` database { - config = main.mysql - - main { - mysql { - db.url = "jdbc:mysql://localhost:3306/cromwell" - db.user = "root" - db.password = "" - db.driver = "com.mysql.jdbc.Driver" - db.connectionTimeout = 5000 # NOTE: The default 1000ms is often too short for production mysql use - driver = "slick.driver.MySQLDriver$" - } + + driver = "slick.driver.MySQLDriver$" + db { + driver = "com.mysql.jdbc.Driver" + url = "jdbc:mysql://host/cromwell" + user = "user" + password = "pass" + connectionTimeout = 5000 } test { @@ -323,6 +367,14 @@ system { Or, via `-Dsystem.abort-jobs-on-terminate=true` command line option. +By default, this value is false when running `java -jar cromwell.jar server`, and true when running `java -jar cromwell.jar run `. + +# Security + + - Cromwell is NOT on its own a security appliance! + - Only YOU are responsible for your own security! + - Some recommendations and suggestions on security can be found in the [SecurityRecommendations.md](SecurityRecommendations.md) document + # Backends A backend represents a way to run the user's command specified in the `task` section. Cromwell allows for backends conforming to @@ -889,6 +941,18 @@ This backend supports CPU, memory and disk size configuration through the use of It they are not set, HtCondor backend will use default values. +### Native Specifications +The use of runtime attribute 'nativeSpecs' allows to the user to attach custom HtCondor configuration to tasks. +An example of this is when there is a need to work with 'requirements' or 'rank' configuration. + +``` +"runtimeAttributes": { + "nativeSpecs": ["requirements = Arch == \"INTEL\"", "rank = Memory >= 64"] +} +``` + +nativeSpecs attribute needs to be specified as an array of strings to work. + ## Spark Backend This backend adds support for execution of spark jobs in a workflow using the existing wdl format. @@ -1053,6 +1117,7 @@ backend { config { project = "my-project" root = "gs://my-bucket" + genomics-api-queries-per-100-seconds = 1000 . . . @@ -1062,6 +1127,8 @@ backend { } ``` +If your project has API quotas other than the defaults set the `genomics-api-queries-per-100-seconds` value to be the lesser of the `Queries per 100 seconds per user` and `Queries per 100 seconds` quotas. This value will be used to help tune Cromwell's rate of interaction with JES. + ### Configuring Authentication The `google` stanza in the Cromwell configuration file defines how to authenticate to Google. There are four different @@ -1139,6 +1206,8 @@ Creating the account will cause the JSON file to be downloaded. The structure o Most importantly, the value of the `client_email` field should go into the `service-account-id` field in the configuration (see below). The `private_key` portion needs to be pulled into its own file (e.g. `my-key.pem`). The `\n`s in the string need to be converted to newline characters. +While technically not part of Service Account authorization mode, one can also override the default service account that the compute VM is started with via the configuration option `JES.config.genomics.compute-service-account` or through the workflow options parameter `google_compute_service_account`. It's important that this service account, and the service account specified in `JES.config.genomics.auth` can both read/write the location specified by `JES.config.root` + #### Refresh Token A **refresh_token** field must be specified in the [workflow options](#workflow-options) when submitting the job. Omitting this field will cause the workflow to fail. @@ -1509,7 +1578,8 @@ Valid keys and their meanings: * The default is `NoNewCalls` but this can be changed using the `workflow-options.workflow-failure-mode` configuration option. * **backend** - Override the default backend specified in the Cromwell configuration for this workflow only. * JES Backend Only - * **jes_gcs_root** - (JES backend only) Specifies where outputs of the workflow will be written. Expects this to be a GCS URL (e.g. `gs://my-bucket/workflows`). If this is not set, this defaults to the value within `backend.jes.root` in the [configuration](#configuring-cromwell). + * **jes_gcs_root** - (JES backend only) Specifies where outputs of the workflow will be written. Expects this to be a GCS URL (e.g. `gs://my-bucket/workflows`). If this is not set, this defaults to the value within `backend.jes.config.root` in the [configuration](#configuring-cromwell). + * **google_compute_service_account** - (JES backend only) Specifies an alternate service account to use on the compute instance (e.g. my-new-svcacct@my-google-project.iam.gserviceaccount.com). If this is not set, this defaults to the value within `backend.jes.config.genomics.compute-service-account` in the [configuration](#configuring-cromwell) if specified or `default` otherwise. * **google_project** - (JES backend only) Specifies which google project to execute this workflow. * **refresh_token** - (JES backend only) Only used if `localizeWithRefreshToken` is specified in the [configuration file](#configuring-cromwell). * **auth_bucket** - (JES backend only) defaults to the the value in **jes_gcs_root**. This should represent a GCS URL that only Cromwell can write to. The Cromwell account is determined by the `google.authScheme` (and the corresponding `google.userAuth` and `google.serviceAuth`) @@ -1528,16 +1598,20 @@ Cromwell's call cache is maintained in its database. For best mileage with call > **Note:** If call caching is enabled, be careful not to change the contents of the output directory for any previously run job. Doing so might cause cache hits in Cromwell to copy over modified data and Cromwell currently does not check that the contents of the output directory changed. +## Configuring Call Caching To enable Call Caching, add the following to your Cromwell [configuration](#configuring-cromwell): ``` call-caching { enabled = true + invalidate-bad-cache-results = true } ``` When `call-caching.enabled=true` (default: `false`), Cromwell will be able to to copy results from previously run jobs (when appropriate). +When `invalidate-bad-cache-results=true` (default: `true`), Cromwell will invalidate any cache results which fail to copy during a cache-hit. This is usually desired but might be unwanted if a cache might fail to copy for external reasons, such as a difference in user authentication. +## Call Caching Workflow Options Cromwell also accepts two [workflow option](#workflow-options) related to call caching: * If call caching is enabled, but one wishes to run a workflow but not add any of the calls into the call cache when they finish, the `write_to_cache` option can be set to `false`. This value defaults to `true`. @@ -1545,6 +1619,508 @@ Cromwell also accepts two [workflow option](#workflow-options) related to call c > **Note:** If call caching is disabled, the to workflow options `read_from_cache` and `write_to_cache` will be ignored and the options will be treated as though they were 'false'. +## Local Filesystem Options +When running a job on the Config (Shared Filesystem) backend, Cromwell provides some additional options in the backend's config section: + +``` + config { + ... + filesystems { + ... + local { + ... + caching { + # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + + # Possible values: file, path + # "file" will compute an md5 hash of the file content. + # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link", + # in order to allow for the original file path to be hashed. + # Default: file + hashing-strategy: "file" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + # Default: false + check-sibling-md5: false + } + } + } + } +``` +# Imports + +Import statements inside of a WDL file are supported by Cromwell when running in Server mode as well as Single Workflow Runner Mode. + +In Single Workflow Runner Mode, you pass in a zip file which includes the WDL files referenced by the import statements. Cromwell requires the zip file to be passed in as a command line argument, as explained by the section [run](#run). + +For example, given a workflow `wf.wdl` and an imports directory `WdlImports.zip`, a sample command would be: +``` +java -jar cromwell.jar wf.wdl wf.inputs - - WdlImports.zip +``` + +In Server Mode, you pass in a zip file using the parameter `wdlDependencies` via the [POST /api/workflows/:version](#post-apiworkflowsversion) endpoint. + + +# Sub Workflows + +WDL allows the execution of an entire workflow as a step in a larger workflow (see WDL SPEC for more details), which is what will be referred to as a sub workflow going forward. +Cromwell supports execution of such workflows. Note that sub workflows can themselves contain sub workflows, etc... There is no limitation as to how deeply workflows can be nested. + +## Execution + +Sub workflows are executed exactly as a task would be. +*This means that if another call depends on an output of a sub workflow, this call will run when the whole sub workflow completes (successfully).* +For example, in the following case : + +`main.wdl` +``` +import "sub_wdl.wdl" as sub + +workflow main_workflow { + + call sub.hello_and_goodbye { input: hello_and_goodbye_input = "sub world" } + + # call myTask { input: hello_and_goodbye.hello_output } + + output { + String main_output = hello_and_goodbye.hello_output + } +} +``` + +`sub_wdl.wdl` +``` +task hello { + String addressee + command { + echo "Hello ${addressee}!" + } + runtime { + docker: "ubuntu:latest" + } + output { + String salutation = read_string(stdout()) + } +} + +task goodbye { + String addressee + command { + echo "Goodbye ${addressee}!" + } + runtime { + docker: "ubuntu:latest" + } + output { + String salutation = read_string(stdout()) + } +} + +workflow hello_and_goodbye { + String hello_and_goodbye_input + + call hello {input: addressee = hello_and_goodbye_input } + call goodbye {input: addressee = hello_and_goodbye_input } + + output { + String hello_output = hello.salutation + String goodbye_output = goodbye.salutation + } +} +``` + +`myTask` will start only when hello_and_goodbye completes (which means all of its calls are done), even though `myTask` only needs the output of hello in the hello_and_goodbye sub workflow. +If hello_and_goodbye fails, then `myTask` won't be executed. +Only workflow outputs are visible outside a workflow, which means that references to outputs produced by a sub workflow will only be valid if those outputs are exposed in the workflow output section. + +Sub workflows are executed in the context of a main workflow, which means that operations that are normally executed once per workflow (set up, clean up, outputs copying, log copying, etc...) +will NOT be re-executed for each sub workflow. For instance if a resource is created during workflow initialization, sub workflows will need to share this same resource. +Workflow outputs will be copied for the main root workflow but not for intermediate sub workflows. + +Restarts, aborts, and call-caching work exactly as they would with tasks. +All tasks run by a sub workflow are eligible for call caching under the same rules as any other task. +However, workflows themselves are not cached as such. Which means that running the exact same workflow twice with call caching on will trigger each task to cache individually, +but not the workflow itself. + +The root path for sub workflow execution files (scripts, output files, logs) will be under the parent workflow call directory. +For example, the execution directory for the above main workflow would look like the following: + +``` +cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/ <- main workflow id +└── call-hello_and_goodbye <- call directory for call hello_and_goodbye in the main workflow + └── hello_and_goodbye <- name of the sub workflow + └── a6365f91-c807-465a-9186-a5d3da98fe11 <- sub workflow id + ├── call-goodbye + │   └── execution + │   ├── rc + │   ├── script + │   ├── script.background + │   ├── script.submit + │   ├── stderr + │   ├── stderr.background + │   ├── stdout + │   └── stdout.background + └── call-hello + └── execution + ├── rc + ├── script + ├── script.background + ├── script.submit + ├── stderr + ├── stderr.background + ├── stdout + └── stdout.background + +``` + +## Metadata +Each sub workflow will have its own workflow ID. This ID will appear in the metadata of the parent workflow, in the call section corresponding to the sub workflow, under the "subWorkflowId" attribute. +For example, querying the `main_workflow` metadata above (minus the `myTask` call) , could result in something like this: + +`GET /api/workflows/v2/1d919bd4-d046-43b0-9918-9964509689dd/metadata` + +``` +{ + "workflowName": "main_workflow", + "submittedFiles": { + "inputs": "{}", + "workflow": "import \"sub_wdl.wdl\" as sub\n\nworkflow main_workflow {\n\n call sub.hello_and_goodbye { input: hello_and_goodbye_input = \"sub world\" }\n \n # call myTask { input: hello_and_goodbye.hello_output }\n \n output {\n String main_output = hello_and_goodbye.hello_output\n }\n}", + "options": "{\n\n}" + }, + "calls": { + "main_workflow.hello_and_goodbye": [ + { + "executionStatus": "Done", + "shardIndex": -1, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "end": "2016-11-17T14:13:41.117-05:00", + "attempt": 1, + "start": "2016-11-17T14:13:39.236-05:00", + "subWorkflowId": "a6365f91-c807-465a-9186-a5d3da98fe11" + } + ] + }, + "outputs": { + "main_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd", + "id": "1d919bd4-d046-43b0-9918-9964509689dd", + "inputs": {}, + "submission": "2016-11-17T14:13:39.104-05:00", + "status": "Succeeded", + "end": "2016-11-17T14:13:41.120-05:00", + "start": "2016-11-17T14:13:39.204-05:00" +} +``` + +The sub workflow ID can be queried separately: + +`GET /api/workflows/v2/a6365f91-c807-465a-9186-a5d3da98fe11/metadata` + +``` +{ + "workflowName": "hello_and_goodbye", + "calls": { + "sub.hello_and_goodbye.hello": [ + { + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Hello sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49830", + "backend": "Local", + "end": "2016-11-17T14:13:40.712-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stderr", + "callRoot": "/cromwell/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:40.704-05:00" + }, + { + "startTime": "2016-11-17T14:13:40.704-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:40.712-05:00" + } + ], + "start": "2016-11-17T14:13:39.239-05:00" + } + ], + "sub.hello_and_goodbye.goodbye": [ + { + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Goodbye sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49831", + "backend": "Local", + "end": "2016-11-17T14:13:41.115-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stderr", + "callRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye", + "attempt": 1, + "executionEvents": [ + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, + { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:41.112-05:00" + }, + { + "startTime": "2016-11-17T14:13:41.112-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:41.115-05:00" + } + ], + "start": "2016-11-17T14:13:39.239-05:00" + } + ] + }, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11", + "id": "a6365f91-c807-465a-9186-a5d3da98fe11", + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "status": "Succeeded", + "parentWorkflowId": "1d919bd4-d046-43b0-9918-9964509689dd", + "end": "2016-11-17T14:13:41.116-05:00", + "start": "2016-11-17T14:13:39.236-05:00" +} +``` + +It's also possible to set the URL query parameter `expandSubWorkflows` to `true` to automatically include sub workflows metadata (`false` by default). + +`GET api/workflows/v2/1d919bd4-d046-43b0-9918-9964509689dd/metadata?expandSubWorkflows=true` + +``` +{ + "workflowName": "main_workflow", + "submittedFiles": { + "inputs": "{}", + "workflow": "import \"sub_wdl.wdl\" as sub\n\nworkflow main_workflow {\n\n call sub.hello_and_goodbye { input: hello_and_goodbye_input = \"sub world\" }\n \n # call myTask { input: hello_and_goodbye.hello_output }\n \n output {\n String main_output = hello_and_goodbye.hello_output\n }\n}", + "options": "{\n\n}" + }, + "calls": { + "main_workflow.hello_and_goodbye": [{ + "executionStatus": "Done", + "subWorkflowMetadata": { + "workflowName": "hello_and_goodbye", + "calls": { + "sub.hello_and_goodbye.hello": [{ + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Hello sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49830", + "backend": "Local", + "end": "2016-11-17T14:13:40.712-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello/execution/stderr", + "callRoot": "cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-hello", + "attempt": 1, + "executionEvents": [{ + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:40.704-05:00" + }, { + "startTime": "2016-11-17T14:13:40.704-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:40.712-05:00" + }], + "start": "2016-11-17T14:13:39.239-05:00" + }], + "sub.hello_and_goodbye.goodbye": [{ + "executionStatus": "Done", + "stdout": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stdout", + "shardIndex": -1, + "outputs": { + "salutation": "Goodbye sub world!" + }, + "runtimeAttributes": { + "docker": "ubuntu:latest", + "failOnStderr": false, + "continueOnReturnCode": "0" + }, + "cache": { + "allowResultReuse": true + }, + "Effective call caching mode": "CallCachingOff", + "inputs": { + "addressee": "sub world" + }, + "returnCode": 0, + "jobId": "49831", + "backend": "Local", + "end": "2016-11-17T14:13:41.115-05:00", + "stderr": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye/execution/stderr", + "callRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11/call-goodbye", + "attempt": 1, + "executionEvents": [{ + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "Pending", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "RequestingExecutionToken", + "endTime": "2016-11-17T14:13:39.240-05:00" + }, { + "startTime": "2016-11-17T14:13:39.240-05:00", + "description": "PreparingJob", + "endTime": "2016-11-17T14:13:39.243-05:00" + }, { + "startTime": "2016-11-17T14:13:39.243-05:00", + "description": "RunningJob", + "endTime": "2016-11-17T14:13:41.112-05:00" + }, { + "startTime": "2016-11-17T14:13:41.112-05:00", + "description": "UpdatingJobStore", + "endTime": "2016-11-17T14:13:41.115-05:00" + }], + "start": "2016-11-17T14:13:39.239-05:00" + }] + }, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd/call-hello_and_goodbye/hello_and_goodbye/a6365f91-c807-465a-9186-a5d3da98fe11", + "id": "a6365f91-c807-465a-9186-a5d3da98fe11", + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "status": "Succeeded", + "parentWorkflowId": "1d919bd4-d046-43b0-9918-9964509689dd", + "end": "2016-11-17T14:13:41.116-05:00", + "start": "2016-11-17T14:13:39.236-05:00" + }, + "shardIndex": -1, + "outputs": { + "goodbye_output": "Goodbye sub world!", + "hello_output": "Hello sub world!" + }, + "inputs": { + "hello_and_goodbye_input": "sub world" + }, + "end": "2016-11-17T14:13:41.117-05:00", + "attempt": 1, + "start": "2016-11-17T14:13:39.236-05:00" + }] + }, + "outputs": { + "main_output": "Hello sub world!" + }, + "workflowRoot": "/cromwell-executions/main_workflow/1d919bd4-d046-43b0-9918-9964509689dd", + "id": "1d919bd4-d046-43b0-9918-9964509689dd", + "inputs": { + + }, + "submission": "2016-11-17T14:13:39.104-05:00", + "status": "Succeeded", + "end": "2016-11-17T14:13:41.120-05:00", + "start": "2016-11-17T14:13:39.204-05:00" +} +``` + # REST API The `server` subcommand on the executable JAR will start an HTTP server which can accept WDL files to run as well as check status and output of existing workflows. @@ -1561,7 +2137,18 @@ This endpoint accepts a POST request with a `multipart/form-data` encoded body. * `wdlSource` - *Required* Contains the WDL file to submit for execution. * `workflowInputs` - *Optional* JSON file containing the inputs. A skeleton file can be generated from [wdltool](https://github.com/broadinstitute/wdltool) using the "inputs" subcommand. +* `workflowInputs_2` - *Optional* JSON file containing the inputs. +* `workflowInputs_3` - *Optional* JSON file containing the inputs. +* `workflowInputs_4` - *Optional* JSON file containing the inputs. +* `workflowInputs_5` - *Optional* JSON file containing the inputs. * `workflowOptions` - *Optional* JSON file containing options for this workflow execution. See the [run](#run) CLI sub-command for some more information about this. +* `wdlDependencies` - *Optional* ZIP file containing WDL files that are used to resolve import statements. + +Regarding the workflowInputs parameter, in case of key conflicts between multiple input JSON files, higher values of x in workflowInputs_x override lower values. For example, an input specified in workflowInputs_3 will override an input with the same name in workflowInputs or workflowInputs_2. +Similarly, an input key specified in workflowInputs_5 will override an identical input key in any other input file. + +Additionally, although Swagger has a limit of 5 JSON input files, the REST endpoint itself can accept an unlimited number of JSON input files. + cURL: diff --git a/SecurityRecommendations.md b/SecurityRecommendations.md new file mode 100644 index 000000000..3d980e582 --- /dev/null +++ b/SecurityRecommendations.md @@ -0,0 +1,51 @@ +Security +======== + + +* [Firecloud](#firecloud) +* [Security by sysadmin](#security) + * [Multi-tenant](#multi-tenant) + + +# Firecloud + +TODO + +# Security by sysadmin +__Warning!__ + +__This section is community-contributed. It is intended as helpful guidance only, and is not endorsed by the Broad Institute.__ + +Cromwell running in server mode accepts all connections on the configured webservice port. The simplest way to restrict access is by putting an authenticating proxy server in between users and the cromwell server: + 1. Configure a firewall rule on the cromwell server host to deny access to the webservice port (e.g. 8000) from all addresses except a secure proxy host. + 1. Configure `` on the proxy host with ``, to proxy authenticated traffic from the world to the cromwell server. Using Apache `httpd` web server for example with basic htpassword file-based authentication, the configuration might look something like: + + ```Apache + + Order deny,allow + Allow from all + AuthType Basic + AuthName "Password Required" + AuthUserFile /path/to/my/htpasswdfile + Require user someone someoneelse + ProxyPass http://101.101.234.567:8000 # address of cromwell server web service + +``` + + 1. That's it. Users now hit `http://my.proxy.org/cromwell` with authenticated requests, and they're forwarded to port 8000 on the cromwell server host. + +## Multi-tenant +The above scheme extends easily to multiple cromwell instances, for use by different groups within an organization for example. If the instances are running on the same host then each instance should be run as its own dedicated service account user, e.g. `cromwell1`, `cromwell2` etc. so that processes running under one cromwell instance cannot access the files of another; different webservice ports must also be configured. If persistent database storage is being used then each instance should be configured with its own database and database user. The proxy configuration above is extended simply by adding another `Location`: + +```Apache + + Order deny,allow + Allow from all + AuthType Basic + AuthName "Password Required" + AuthUserFile /path/to/my/htpasswdfile1 + Require user stillanotherperson andanother + ProxyPass http://101.101.234.567:8001 + +``` + diff --git a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala index 11a95df39..f1bf38386 100644 --- a/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala @@ -3,7 +3,7 @@ package cromwell.backend import akka.actor.{Actor, ActorLogging} import akka.event.LoggingReceive import cromwell.backend.BackendCacheHitCopyingActor.CopyOutputsCommand -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, FailedNonRetryableResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse, JobFailedNonRetryableResponse} import cromwell.backend.BackendLifecycleActor._ import cromwell.core.simpleton.WdlValueSimpleton @@ -29,6 +29,6 @@ trait BackendCacheHitCopyingActor extends Actor with ActorLogging with BackendJo def abort(): Unit = log.warning("{}: Abort not supported during cache hit copying", jobTag) private def cachingFailed(t: Throwable) = { - FailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) + JobFailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) } } diff --git a/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala b/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala new file mode 100644 index 000000000..1dbc9ca50 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/BackendJobBreadCrumb.scala @@ -0,0 +1,14 @@ +package cromwell.backend + +import java.nio.file.Path + +import cromwell.backend.io.JobPaths +import cromwell.core.{JobKey, WorkflowId} +import wdl4s.Workflow + +case class BackendJobBreadCrumb(workflow: Workflow, id: WorkflowId, jobKey: JobKey) { + def toPath(root: Path): Path = { + val workflowPart = root.resolve(workflow.unqualifiedName).resolve(id.toString) + JobPaths.callPathBuilder(workflowPart, jobKey) + } +} diff --git a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala index 60cdc1e02..816f43da9 100644 --- a/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendJobExecutionActor.scala @@ -1,10 +1,13 @@ package cromwell.backend +import java.nio.file.Path + import akka.actor.ActorLogging import akka.event.LoggingReceive import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.BackendLifecycleActor._ -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.backend.wdl.OutputEvaluator +import cromwell.core.{CallOutputs, ExecutionEvent, JobKey} import wdl4s.expression.WdlStandardLibraryFunctions import wdl4s.values.WdlValue @@ -21,12 +24,12 @@ object BackendJobExecutionActor { // Responses sealed trait BackendJobExecutionActorResponse extends BackendWorkflowLifecycleActorResponse - sealed trait BackendJobExecutionResponse extends BackendJobExecutionActorResponse { def jobKey: BackendJobDescriptorKey } - case class SucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: JobOutputs, jobDetritusFiles: Option[Map[String, String]], executionEvents: Seq[ExecutionEvent]) extends BackendJobExecutionResponse + sealed trait BackendJobExecutionResponse extends BackendJobExecutionActorResponse { def jobKey: JobKey } + case class JobSucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: CallOutputs, jobDetritusFiles: Option[Map[String, Path]], executionEvents: Seq[ExecutionEvent]) extends BackendJobExecutionResponse case class AbortedResponse(jobKey: BackendJobDescriptorKey) extends BackendJobExecutionResponse sealed trait BackendJobFailedResponse extends BackendJobExecutionResponse { def throwable: Throwable; def returnCode: Option[Int] } - case class FailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse - case class FailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse + case class JobFailedNonRetryableResponse(jobKey: JobKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse + case class JobFailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) extends BackendJobFailedResponse } /** @@ -45,7 +48,7 @@ trait BackendJobExecutionActor extends BackendJobLifecycleActor with ActorLoggin // We need this for receive because we can't do `onFailure = ExecutionFailure` directly - because BackendJobDescriptor =/= BackendJobDescriptorKey private def executionFailed = (t: Throwable) => - FailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) + JobFailedNonRetryableResponse(jobKey = jobDescriptor.key, throwable = t, returnCode = None) /** * Execute a new job. diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala index 58ef72643..a6a09cff4 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActor.scala @@ -3,7 +3,7 @@ package cromwell.backend import akka.actor.{Actor, ActorRef} import cromwell.backend.BackendLifecycleActor._ import cromwell.core.logging.{JobLogging, WorkflowLogging} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} @@ -55,7 +55,7 @@ trait BackendLifecycleActor extends Actor { trait BackendWorkflowLifecycleActor extends BackendLifecycleActor with WorkflowLogging { //For Logging and boilerplate - override lazy final val workflowId = workflowDescriptor.id + override lazy final val workflowIdForLogging = workflowDescriptor.id /** * The workflow descriptor for the workflow in which this Backend is being used @@ -65,7 +65,7 @@ trait BackendWorkflowLifecycleActor extends BackendLifecycleActor with WorkflowL /** * The subset of calls which this backend will be expected to run */ - protected def calls: Seq[Call] + protected def calls: Set[TaskCall] } trait BackendJobLifecycleActor extends BackendLifecycleActor with JobLogging { diff --git a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala index 5a6c5d268..cb625a78b 100644 --- a/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala +++ b/backend/src/main/scala/cromwell/backend/BackendLifecycleActorFactory.scala @@ -6,17 +6,17 @@ import akka.actor.{ActorRef, Props} import com.typesafe.config.Config import cromwell.backend.callcaching.FileHashingActor import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction -import cromwell.backend.io.WorkflowPaths +import cromwell.backend.io.WorkflowPathsWithDocker +import cromwell.core.CallOutputs import cromwell.core.JobExecutionToken.JobExecutionTokenType -import cromwell.core.{ExecutionStore, OutputStore} -import wdl4s.Call -import wdl4s.expression.WdlStandardLibraryFunctions +import wdl4s.TaskCall +import wdl4s.expression.{PureStandardLibraryFunctions, WdlStandardLibraryFunctions} trait BackendLifecycleActorFactory { def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - serviceRegistryActor: ActorRef): Option[Props] + calls: Set[TaskCall], + serviceRegistryActor: ActorRef): Option[Props] = None def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, initializationData: Option[BackendInitializationData], @@ -37,17 +37,21 @@ trait BackendLifecycleActorFactory { def backendSingletonActorProps: Option[Props] = None def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = None def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, - initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions + initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = PureStandardLibraryFunctions def getExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { - new WorkflowPaths(workflowDescriptor, backendConfig).executionRoot + new WorkflowPathsWithDocker(workflowDescriptor, backendConfig).executionRoot + } + + def getWorkflowExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { + new WorkflowPathsWithDocker(workflowDescriptor, backendConfig).workflowRoot } def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]): Set[RuntimeAttributeDefinition] = Set.empty diff --git a/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala b/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala index f98234ce5..feaf5720b 100644 --- a/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala +++ b/backend/src/main/scala/cromwell/backend/BackendWorkflowInitializationActor.scala @@ -4,13 +4,13 @@ import akka.actor.{ActorLogging, ActorRef} import akka.event.LoggingReceive import cromwell.backend.BackendLifecycleActor._ import cromwell.backend.BackendWorkflowInitializationActor._ -import cromwell.backend.wdl.OnlyPureFunctions +import wdl4s.expression.PureStandardLibraryFunctions import cromwell.core.{WorkflowMetadataKeys, WorkflowOptions} import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import wdl4s.types._ import wdl4s.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} -import wdl4s.{Call, NoLookup, Task, WdlExpression} +import wdl4s._ import scala.concurrent.Future import scala.util.{Failure, Success, Try} @@ -36,7 +36,7 @@ object BackendWorkflowInitializationActor { trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor with ActorLogging { val serviceRegistryActor: ActorRef - def calls: Seq[Call] + def calls: Set[TaskCall] /** * This method is meant only as a "pre-flight check" validation of runtime attribute expressions during workflow @@ -53,7 +53,7 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w wdlExpressionMaybe match { case None => !valueRequired case Some(wdlExpression: WdlExpression) => - wdlExpression.evaluate(NoLookup, OnlyPureFunctions) map (_.wdlType) match { + wdlExpression.evaluate(NoLookup, PureStandardLibraryFunctions) map (_.wdlType) match { case Success(wdlType) => predicate(wdlType) case Failure(_) => true // If we can't evaluate it, we'll let it pass for now... } @@ -81,7 +81,7 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w wdlExpressionMaybe match { case None => !valueRequired case Some(wdlExpression: WdlExpression) => - wdlExpression.evaluate(NoLookup, OnlyPureFunctions) match { + wdlExpression.evaluate(NoLookup, PureStandardLibraryFunctions) match { case Success(wdlValue) => validateValue(wdlValue) case Failure(throwable) => true // If we can't evaluate it, we'll let it pass for now... } @@ -91,6 +91,9 @@ trait BackendWorkflowInitializationActor extends BackendWorkflowLifecycleActor w protected def runtimeAttributeValidators: Map[String, Option[WdlValue] => Boolean] + // FIXME: If a workflow executes jobs using multiple backends, + // each backend will try to write its own workflow root and override any previous one. + // They should be structured differently or at least be prefixed by the backend name protected def publishWorkflowRoot(workflowRoot: String) = { serviceRegistryActor ! PutMetadataAction(MetadataEvent(MetadataKey(workflowDescriptor.id, None, WorkflowMetadataKeys.WorkflowRoot), MetadataValue(workflowRoot))) } diff --git a/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala b/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala deleted file mode 100644 index 0c6365265..000000000 --- a/backend/src/main/scala/cromwell/backend/OutputEvaluator.scala +++ /dev/null @@ -1,31 +0,0 @@ -package cromwell.backend - -import cromwell.core.JobOutput -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.util.TryUtil -import wdl4s.values.WdlValue - -import scala.util.{Success, Try} - -object OutputEvaluator { - def evaluateOutputs(jobDescriptor: BackendJobDescriptor, - wdlFunctions: WdlStandardLibraryFunctions, - postMapper: WdlValue => Try[WdlValue] = v => Success(v)) = { - val inputs = jobDescriptor.inputs - val evaluatedOutputs = jobDescriptor.call.task.outputs. - foldLeft(Map.empty[LocallyQualifiedName, Try[JobOutput]])((outputMap, output) => { - val currentOutputs = outputMap collect { - case (name, value) if value.isSuccess => name -> value.get.wdlValue - } - def lookup = (currentOutputs ++ inputs).apply _ - val coerced = output.requiredExpression.evaluate(lookup, wdlFunctions) flatMap output.wdlType.coerceRawValue - val jobOutput = output.name -> (coerced flatMap postMapper map JobOutput) - - outputMap + jobOutput - - }) - - TryUtil.sequenceMap(evaluatedOutputs, s"Workflow ${jobDescriptor.workflowDescriptor.id} post processing failed.") - } -} diff --git a/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala b/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala index 238309143..ae9d4b9bf 100644 --- a/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala +++ b/backend/src/main/scala/cromwell/backend/RuntimeAttributeDefinition.scala @@ -20,8 +20,8 @@ object RuntimeAttributeDefinition { def evaluateRuntimeAttributes(unevaluated: RuntimeAttributes, wdlFunctions: WdlStandardLibraryFunctions, - evaluatedInputs: Map[LocallyQualifiedName, WdlValue]): Try[Map[String, WdlValue]] = { - val tryInputs = evaluatedInputs map { case (x, y) => x -> Success(y) } + evaluatedInputs: Map[Declaration, WdlValue]): Try[Map[String, WdlValue]] = { + val tryInputs = evaluatedInputs map { case (x, y) => x.unqualifiedName -> Success(y) } val mapBasedLookup = buildMapBasedLookup(tryInputs) _ val mapOfTries = unevaluated.attrs mapValues { expr => expr.evaluate(mapBasedLookup, wdlFunctions) diff --git a/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala b/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala index bbbfbf82b..759127d67 100644 --- a/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala +++ b/backend/src/main/scala/cromwell/backend/async/AsyncBackendJobExecutionActor.scala @@ -2,7 +2,7 @@ package cromwell.backend.async import akka.actor.{Actor, ActorLogging, ActorRef} import cromwell.backend.BackendJobDescriptor -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse, _} +import cromwell.backend.BackendJobExecutionActor._ import cromwell.backend.async.AsyncBackendJobExecutionActor._ import cromwell.core.CromwellFatalException import cromwell.core.retry.{Retry, SimpleExponentialBackoff} @@ -60,7 +60,7 @@ trait AsyncBackendJobExecutionActor { this: Actor with ActorLogging => } private def failAndStop(t: Throwable) = { - val responseBuilder = if (retryable) FailedRetryableResponse else FailedNonRetryableResponse + val responseBuilder = if (retryable) JobFailedRetryableResponse else JobFailedNonRetryableResponse completionPromise.success(responseBuilder.apply(jobDescriptor.key, t, None)) context.stop(self) } @@ -75,13 +75,13 @@ trait AsyncBackendJobExecutionActor { this: Actor with ActorLogging => context.system.scheduler.scheduleOnce(pollBackOff.backoffMillis.millis, self, IssuePollRequest(handle)) () case Finish(SuccessfulExecutionHandle(outputs, returnCode, jobDetritusFiles, executionEvents, resultsClonedFrom)) => - completionPromise.success(SucceededResponse(jobDescriptor.key, Some(returnCode), outputs, Option(jobDetritusFiles), executionEvents)) + completionPromise.success(JobSucceededResponse(jobDescriptor.key, Some(returnCode), outputs, Option(jobDetritusFiles), executionEvents)) context.stop(self) case Finish(FailedNonRetryableExecutionHandle(throwable, returnCode)) => - completionPromise.success(FailedNonRetryableResponse(jobDescriptor.key, throwable, returnCode)) + completionPromise.success(JobFailedNonRetryableResponse(jobDescriptor.key, throwable, returnCode)) context.stop(self) case Finish(FailedRetryableExecutionHandle(throwable, returnCode)) => - completionPromise.success(FailedRetryableResponse(jobDescriptor.key, throwable, returnCode)) + completionPromise.success(JobFailedRetryableResponse(jobDescriptor.key, throwable, returnCode)) context.stop(self) case Finish(cromwell.backend.async.AbortedExecutionHandle) => completionPromise.success(AbortedResponse(jobDescriptor.key)) diff --git a/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala b/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala index 88232f3b2..1e4238014 100644 --- a/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala +++ b/backend/src/main/scala/cromwell/backend/async/ExecutionHandle.scala @@ -1,7 +1,9 @@ package cromwell.backend.async +import java.nio.file.Path + import cromwell.backend.BackendJobDescriptor -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.core.{ExecutionEvent, CallOutputs} /** * Trait to encapsulate whether an execution is complete and if so provide a result. Useful in conjunction @@ -12,7 +14,7 @@ trait ExecutionHandle { def result: ExecutionResult } -final case class SuccessfulExecutionHandle(outputs: JobOutputs, returnCode: Int, jobDetritusFiles: Map[String, String], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionHandle { +final case class SuccessfulExecutionHandle(outputs: CallOutputs, returnCode: Int, jobDetritusFiles: Map[String, Path], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionHandle { override val isDone = true override val result = SuccessfulExecution(outputs, returnCode, jobDetritusFiles, executionEvents, resultsClonedFrom) } diff --git a/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala b/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala index 267bea877..ff9722337 100644 --- a/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala +++ b/backend/src/main/scala/cromwell/backend/async/ExecutionResult.scala @@ -1,7 +1,9 @@ package cromwell.backend.async +import java.nio.file.Path + import cromwell.backend.BackendJobDescriptor -import cromwell.core.{ExecutionEvent, JobOutputs} +import cromwell.core.{ExecutionEvent, CallOutputs} /** * ADT representing the result of an execution of a BackendCall. @@ -11,9 +13,9 @@ sealed trait ExecutionResult /** * A successful execution with resolved outputs. */ -final case class SuccessfulExecution(outputs: JobOutputs, +final case class SuccessfulExecution(outputs: CallOutputs, returnCode: Int, - jobDetritusFiles: Map[String, String], + jobDetritusFiles: Map[String, Path], executionEvents: Seq[ExecutionEvent], resultsClonedFrom: Option[BackendJobDescriptor] = None) extends ExecutionResult diff --git a/backend/src/main/scala/cromwell/backend/backend.scala b/backend/src/main/scala/cromwell/backend/backend.scala index 8ac55a347..e1addfe30 100644 --- a/backend/src/main/scala/cromwell/backend/backend.scala +++ b/backend/src/main/scala/cromwell/backend/backend.scala @@ -2,20 +2,19 @@ package cromwell.backend import com.typesafe.config.Config import cromwell.core.WorkflowOptions.WorkflowOption -import cromwell.core.{JobKey, WorkflowId, WorkflowOptions} +import cromwell.core.{CallKey, WorkflowId, WorkflowOptions} +import wdl4s._ import wdl4s.values.WdlValue -import wdl4s.{Call, NamespaceWithWorkflow, _} import scala.util.Try /** * For uniquely identifying a job which has been or will be sent to the backend. */ -case class BackendJobDescriptorKey(call: Call, index: Option[Int], attempt: Int) extends JobKey { +case class BackendJobDescriptorKey(call: TaskCall, index: Option[Int], attempt: Int) extends CallKey { def scope = call private val indexString = index map { _.toString } getOrElse "NA" val tag = s"${call.fullyQualifiedName}:$indexString:$attempt" - val isShard = index.isDefined def mkTag(workflowId: WorkflowId) = s"$workflowId:$this" } @@ -25,19 +24,34 @@ case class BackendJobDescriptorKey(call: Call, index: Option[Int], attempt: Int) case class BackendJobDescriptor(workflowDescriptor: BackendWorkflowDescriptor, key: BackendJobDescriptorKey, runtimeAttributes: Map[LocallyQualifiedName, WdlValue], - inputs: Map[LocallyQualifiedName, WdlValue]) { + inputDeclarations: EvaluatedTaskInputs) { + val fullyQualifiedInputs = inputDeclarations map { case (declaration, value) => declaration.fullyQualifiedName -> value } val call = key.call override val toString = s"${key.mkTag(workflowDescriptor.id)}" } +object BackendWorkflowDescriptor { + def apply(id: WorkflowId, + workflow: Workflow, + inputs: Map[FullyQualifiedName, WdlValue], + workflowOptions: WorkflowOptions) = { + new BackendWorkflowDescriptor(id, workflow, inputs, workflowOptions, List.empty) + } +} + /** * For passing to a BackendActor construction time */ case class BackendWorkflowDescriptor(id: WorkflowId, - workflowNamespace: NamespaceWithWorkflow, + workflow: Workflow, inputs: Map[FullyQualifiedName, WdlValue], - workflowOptions: WorkflowOptions) { - override def toString: String = s"[BackendWorkflowDescriptor id=${id.shortString} workflowName=${workflowNamespace.workflow.unqualifiedName}]" + workflowOptions: WorkflowOptions, + breadCrumbs: List[BackendJobBreadCrumb]) { + + val rootWorkflow = breadCrumbs.headOption.map(_.workflow).getOrElse(workflow) + val rootWorkflowId = breadCrumbs.headOption.map(_.id).getOrElse(id) + + override def toString: String = s"[BackendWorkflowDescriptor id=${id.shortString} workflowName=${workflow.unqualifiedName}]" def getWorkflowOption(key: WorkflowOption) = workflowOptions.get(key).toOption } diff --git a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala b/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala index c199de6d4..48a6d590e 100644 --- a/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala +++ b/backend/src/main/scala/cromwell/backend/callcaching/CacheHitDuplicating.scala @@ -4,12 +4,15 @@ import java.nio.file.Path import akka.actor.ActorRef import cromwell.backend.BackendCacheHitCopyingActor -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} import cromwell.backend.io.JobPaths -import cromwell.core.PathCopier +import cromwell.core.path.PathCopier import cromwell.core.simpleton.{WdlValueBuilder, WdlValueSimpleton} import wdl4s.values.WdlFile +import scala.language.postfixOps +import scala.util.Try + /** * Mixin implementing common functionality for a BackendCacheHitCopyingActor. * @@ -35,7 +38,7 @@ trait CacheHitDuplicating { * @param file the string version of the path * @return an absolute path to the file with potential credentials embedded within. */ - protected def getPath(file: String): Path + protected def getPath(file: String): Try[Path] protected def destinationCallRootPath: Path @@ -47,9 +50,10 @@ trait CacheHitDuplicating { protected def metadataKeyValues: Map[String, Any] private def lookupSourceCallRootPath(sourceJobDetritusFiles: Map[String, String]): Path = { - sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).getOrElse(throw new RuntimeException( - s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}") - ) + sourceJobDetritusFiles.get(JobPaths.CallRootPathKey).map(getPath).get recover { + case failure => + throw new RuntimeException(s"${JobPaths.CallRootPathKey} wasn't found for call ${jobDescriptor.call.fullyQualifiedName}", failure) + } get } /** @@ -59,27 +63,27 @@ trait CacheHitDuplicating { sourceCallRootPath: Path): Seq[WdlValueSimpleton] = { wdlValueSimpletons map { case WdlValueSimpleton(key, wdlFile: WdlFile) => - val sourcePath = getPath(wdlFile.value) + val sourcePath = getPath(wdlFile.value).get val destinationPath = PathCopier.getDestinationFilePath(sourceCallRootPath, sourcePath, destinationCallRootPath) duplicate(sourcePath, destinationPath) - WdlValueSimpleton(key, WdlFile(destinationPath.toString)) + WdlValueSimpleton(key, WdlFile(destinationPath.toUri.toString)) case wdlValueSimpleton => wdlValueSimpleton } } - private def copyDetritus(sourceJobDetritusFiles: Map[String, String]): Map[String, String] = { + private def copyDetritus(sourceJobDetritusFiles: Map[String, String]): Map[String, Path] = { val sourceKeys = sourceJobDetritusFiles.keySet val destinationKeys = destinationJobDetritusPaths.keySet val fileKeys = sourceKeys.intersect(destinationKeys).filterNot(_ == JobPaths.CallRootPathKey) val destinationJobDetritusFiles = fileKeys map { fileKey => - val sourcePath = getPath(sourceJobDetritusFiles(fileKey)) + val sourcePath = getPath(sourceJobDetritusFiles(fileKey)).get val destinationPath = destinationJobDetritusPaths(fileKey) duplicate(sourcePath, destinationPath) - (fileKey, destinationPath.toString) + (fileKey, destinationPath) } - destinationJobDetritusFiles.toMap + (JobPaths.CallRootPathKey -> destinationCallRootPath.toString) + destinationJobDetritusFiles.toMap + (JobPaths.CallRootPathKey -> destinationCallRootPath) } override def copyCachedOutputs(wdlValueSimpletons: Seq[WdlValueSimpleton], @@ -95,6 +99,6 @@ trait CacheHitDuplicating { import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) - SucceededResponse(jobDescriptor.key, returnCodeOption, destinationJobOutputs, Option(destinationJobDetritusFiles), Seq.empty) + JobSucceededResponse(jobDescriptor.key, returnCodeOption, destinationJobOutputs, Option(destinationJobDetritusFiles), Seq.empty) } } diff --git a/backend/src/main/scala/cromwell/backend/io/JobPaths.scala b/backend/src/main/scala/cromwell/backend/io/JobPaths.scala index c9a5ac120..5c3a0c9f7 100644 --- a/backend/src/main/scala/cromwell/backend/io/JobPaths.scala +++ b/backend/src/main/scala/cromwell/backend/io/JobPaths.scala @@ -2,8 +2,7 @@ package cromwell.backend.io import java.nio.file.Path -import com.typesafe.config.Config -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.JobKey import cromwell.services.metadata.CallMetadataKeys object JobPaths { @@ -15,65 +14,50 @@ object JobPaths { val StdErrPathKey = "stderr" val ReturnCodePathKey = "returnCode" val CallRootPathKey = "callRootPath" -} - -class JobPaths(workflowDescriptor: BackendWorkflowDescriptor, - config: Config, - jobKey: BackendJobDescriptorKey) extends WorkflowPaths(workflowDescriptor, config) { - import JobPaths._ - private def callPathBuilder(root: Path) = { - val callName = jobKey.call.fullyQualifiedName.split('.').last + def callPathBuilder(root: Path, jobKey: JobKey) = { + val callName = jobKey.scope.unqualifiedName val call = s"$CallPrefix-$callName" val shard = jobKey.index map { s => s"$ShardPrefix-$s" } getOrElse "" val retry = if (jobKey.attempt > 1) s"$AttemptPrefix-${jobKey.attempt}" else "" List(call, shard, retry).foldLeft(root)((path, dir) => path.resolve(dir)) } +} - def toDockerPath(path: Path): Path = { - path.toAbsolutePath match { - case p if p.startsWith(WorkflowPaths.DockerRoot) => p - case p => - /** For example: - * - * p = /abs/path/to/cromwell-executions/three-step/f00ba4/call-ps/stdout.txt - * localExecutionRoot = /abs/path/to/cromwell-executions - * subpath = three-step/f00ba4/call-ps/stdout.txt - * - * return value = /root/three-step/f00ba4/call-ps/stdout.txt - * - * TODO: this assumes that p.startsWith(localExecutionRoot) - */ - val subpath = p.subpath(executionRoot.getNameCount, p.getNameCount) - WorkflowPaths.DockerRoot.resolve(subpath) - } - } - - val callRoot = callPathBuilder(workflowRoot) - val callDockerRoot = callPathBuilder(dockerWorkflowRoot) - - val callExecutionRoot = callRoot.resolve("execution") - val callExecutionDockerRoot = callDockerRoot.resolve("execution") - - val callInputsRoot = callRoot.resolve("inputs") - - val stdout = callExecutionRoot.resolve("stdout") - val stderr = callExecutionRoot.resolve("stderr") - val script = callExecutionRoot.resolve("script") - val returnCode = callExecutionRoot.resolve("rc") +trait JobPaths { this: WorkflowPaths => + import JobPaths._ - lazy val metadataPaths: Map[String, Path] = Map( + def returnCodeFilename: String = "rc" + def stdoutFilename: String = "stdout" + def stderrFilename: String = "stderr" + def scriptFilename: String = "script" + + def jobKey: JobKey + lazy val callRoot = callPathBuilder(workflowRoot, jobKey) + lazy val callExecutionRoot = callRoot + lazy val stdout = callExecutionRoot.resolve(stdoutFilename) + lazy val stderr = callExecutionRoot.resolve(stderrFilename) + lazy val script = callExecutionRoot.resolve(scriptFilename) + lazy val returnCode = callExecutionRoot.resolve(returnCodeFilename) + + private lazy val commonMetadataPaths: Map[String, Path] = Map( CallMetadataKeys.CallRoot -> callRoot, CallMetadataKeys.Stdout -> stdout, CallMetadataKeys.Stderr -> stderr ) - lazy val detritusPaths: Map[String, Path] = Map( + private lazy val commonDetritusPaths: Map[String, Path] = Map( JobPaths.CallRootPathKey -> callRoot, JobPaths.ScriptPathKey -> script, JobPaths.StdoutPathKey -> stdout, JobPaths.StdErrPathKey -> stderr, JobPaths.ReturnCodePathKey -> returnCode ) + + protected lazy val customMetadataPaths: Map[String, Path] = Map.empty + protected lazy val customDetritusPaths: Map[String, Path] = Map.empty + + lazy val metadataPaths = commonMetadataPaths ++ customMetadataPaths + lazy val detritusPaths = commonDetritusPaths ++ customDetritusPaths } diff --git a/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala b/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala new file mode 100644 index 000000000..49c748e88 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/io/JobPathsWithDocker.scala @@ -0,0 +1,39 @@ +package cromwell.backend.io + +import java.nio.file.Path + +import com.typesafe.config.Config +import cromwell.backend.{BackendWorkflowDescriptor, BackendJobDescriptorKey} +import cromwell.core.path.PathBuilder + +class JobPathsWithDocker(val jobKey: BackendJobDescriptorKey, + workflowDescriptor: BackendWorkflowDescriptor, + config: Config, + pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) extends WorkflowPathsWithDocker( + workflowDescriptor, config, pathBuilders) with JobPaths { + import JobPaths._ + + override lazy val callExecutionRoot = { callRoot.resolve("execution") } + val callDockerRoot = callPathBuilder(dockerWorkflowRoot, jobKey) + val callExecutionDockerRoot = callDockerRoot.resolve("execution") + val callInputsRoot = callRoot.resolve("inputs") + + def toDockerPath(path: Path): Path = { + path.toAbsolutePath match { + case p if p.startsWith(WorkflowPathsWithDocker.DockerRoot) => p + case p => + /* For example: + * + * p = /abs/path/to/cromwell-executions/three-step/f00ba4/call-ps/stdout.txt + * localExecutionRoot = /abs/path/to/cromwell-executions + * subpath = three-step/f00ba4/call-ps/stdout.txt + * + * return value = /root/three-step/f00ba4/call-ps/stdout.txt + * + * TODO: this assumes that p.startsWith(localExecutionRoot) + */ + val subpath = p.subpath(executionRoot.getNameCount, p.getNameCount) + WorkflowPathsWithDocker.DockerRoot.resolve(subpath) + } + } +} \ No newline at end of file diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala index 23bdae992..be959aec5 100644 --- a/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPaths.scala @@ -1,26 +1,37 @@ package cromwell.backend.io -import java.nio.file.{FileSystem, FileSystems, Path, Paths} +import java.nio.file.Path import com.typesafe.config.Config import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.PathFactory +import cromwell.core.WorkflowOptions.FinalCallLogsDir +import cromwell.core.path.{DefaultPathBuilder, PathFactory} import net.ceedubs.ficus.Ficus._ -object WorkflowPaths{ - val DockerRoot = Paths.get("/root") -} +import scala.util.Try -class WorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, config: Config, val fileSystems: List[FileSystem] = List(FileSystems.getDefault)) extends PathFactory { - val executionRoot = Paths.get(config.as[Option[String]]("root").getOrElse("cromwell-executions")).toAbsolutePath +object WorkflowPaths { + val DefaultPathBuilders = List(DefaultPathBuilder) +} - private def workflowPathBuilder(root: Path) = { - root.resolve(workflowDescriptor.workflowNamespace.workflow.unqualifiedName) - .resolve(workflowDescriptor.id.toString) +trait WorkflowPaths extends PathFactory { + def workflowDescriptor: BackendWorkflowDescriptor + def config: Config + + protected lazy val executionRootString = config.as[Option[String]]("root").getOrElse("cromwell-executions") + + def getPath(url: String): Try[Path] = Try(PathFactory.buildPath(url, pathBuilders)) + + // Rebuild potential intermediate call directories in case of a sub workflow + protected def workflowPathBuilder(root: Path) = { + workflowDescriptor.breadCrumbs.foldLeft(root)((acc, breadCrumb) => { + breadCrumb.toPath(acc) + }).resolve(workflowDescriptor.workflow.unqualifiedName).resolve(workflowDescriptor.id.toString + "/") } + lazy val executionRoot = PathFactory.buildPath(executionRootString, pathBuilders).toAbsolutePath lazy val workflowRoot = workflowPathBuilder(executionRoot) - lazy val dockerWorkflowRoot = workflowPathBuilder(WorkflowPaths.DockerRoot) + lazy val finalCallLogsPath = workflowDescriptor.getWorkflowOption(FinalCallLogsDir) map getPath map { _.get } - def toJobPaths(jobKey: BackendJobDescriptorKey) = new JobPaths(workflowDescriptor, config, jobKey) + def toJobPaths(jobKey: BackendJobDescriptorKey): JobPaths } diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala index 6ac8ba960..b0861d6bb 100644 --- a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsBackendInitializationData.scala @@ -1,8 +1,7 @@ package cromwell.backend.io -import java.nio.file.FileSystem - import cromwell.backend.BackendInitializationData +import cromwell.core.path.PathBuilder /** * Extension of backend initialization data that also provides a `WorkflowPaths`, and by proxy its `List[FileSystem]`. @@ -39,7 +38,7 @@ object WorkflowPathsBackendInitializationData { BackendInitializationData.as[WorkflowPathsBackendInitializationData](initializationDataOption).workflowPaths } - def fileSystems(initializationDataOption: Option[BackendInitializationData]): List[FileSystem] = { - workflowPaths(initializationDataOption).fileSystems + def pathBuilders(initializationDataOption: Option[BackendInitializationData]): List[PathBuilder] = { + workflowPaths(initializationDataOption).pathBuilders } } diff --git a/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala new file mode 100644 index 000000000..c10e66972 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/io/WorkflowPathsWithDocker.scala @@ -0,0 +1,16 @@ +package cromwell.backend.io + +import java.nio.file.Paths + +import com.typesafe.config.Config +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.path.PathBuilder + +object WorkflowPathsWithDocker { + val DockerRoot = Paths.get("/root") +} + +class WorkflowPathsWithDocker(val workflowDescriptor: BackendWorkflowDescriptor, val config: Config, val pathBuilders: List[PathBuilder] = WorkflowPaths.DefaultPathBuilders) extends WorkflowPaths { + val dockerWorkflowRoot = workflowPathBuilder(WorkflowPathsWithDocker.DockerRoot) + override def toJobPaths(jobKey: BackendJobDescriptorKey): JobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, config, pathBuilders) +} \ No newline at end of file diff --git a/backend/src/main/scala/cromwell/backend/package.scala b/backend/src/main/scala/cromwell/backend/package.scala index 3bad6f61f..132fcf578 100644 --- a/backend/src/main/scala/cromwell/backend/package.scala +++ b/backend/src/main/scala/cromwell/backend/package.scala @@ -1,14 +1,6 @@ package cromwell -import wdl4s.values.WdlValue - -import scala.language.postfixOps -import scala.util.Success - package object backend { - implicit class AugmentedAttemptedLookupSequence(s: Seq[AttemptedLookupResult]) { - def toLookupMap: Map[String, WdlValue] = s collect { - case AttemptedLookupResult(name, Success(value)) => (name, value) - } toMap - } + /** Represents the jobKeys executed by a (potentially sub-) workflow at a given point in time */ + type JobExecutionMap = Map[BackendWorkflowDescriptor, List[BackendJobDescriptorKey]] } diff --git a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala index 9e56c71be..1ba92527e 100644 --- a/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala +++ b/backend/src/main/scala/cromwell/backend/validation/RuntimeAttributesValidation.scala @@ -1,9 +1,8 @@ package cromwell.backend.validation import cats.syntax.validated._ -import cromwell.backend.wdl.OnlyPureFunctions +import wdl4s.expression.PureStandardLibraryFunctions import cromwell.backend.{MemorySize, RuntimeAttributeDefinition} -import cromwell.core._ import cromwell.core.ErrorOr._ import org.slf4j.Logger import wdl4s.WdlExpression @@ -336,7 +335,7 @@ trait RuntimeAttributesValidation[ValidatedType] { For now, if something tries to "lookup" a value, convert it to a WdlString. */ val wdlStringLookup: ScopedLookupFunction = (value: String) => WdlString(value) - wdlExpression.evaluate(wdlStringLookup, OnlyPureFunctions) match { + wdlExpression.evaluate(wdlStringLookup, PureStandardLibraryFunctions) match { case Success(wdlValue) => validateExpression.applyOrElse(wdlValue, (_: Any) => false) case Failure(throwable) => throw new RuntimeException(s"Expression evaluation failed due to $throwable: $wdlExpression", throwable) diff --git a/backend/src/main/scala/cromwell/backend/wdl/Command.scala b/backend/src/main/scala/cromwell/backend/wdl/Command.scala new file mode 100644 index 000000000..b0e3c9294 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wdl/Command.scala @@ -0,0 +1,31 @@ +package cromwell.backend.wdl + +import cromwell.backend.BackendJobDescriptor +import wdl4s.EvaluatedTaskInputs +import wdl4s.expression.WdlFunctions +import wdl4s.values.WdlValue + +import scala.util.{Success, Try} + +object Command { + + /** + * Instantiate the command for this job descriptor. + * + * @param jobDescriptor jobDescriptor to instantiate the command for + * @param callEngineFunction engine functions to use to evaluate expressions inside the command + * @param inputsPreProcessor function to be applied to the task inputs before they are used to instantiate the command + * Typically this is where localization and/or file path transformation work would be done. + * The return value of the function is the inputs map that will be used to resolve variables in the command line. + * @param valueMapper function to apply, during instantiation of the command line, after a variable is resolved + * @return + */ + def instantiate(jobDescriptor: BackendJobDescriptor, + callEngineFunction: WdlFunctions[WdlValue], + inputsPreProcessor: EvaluatedTaskInputs => Try[EvaluatedTaskInputs] = (i: EvaluatedTaskInputs) => Success(i), + valueMapper: WdlValue => WdlValue = identity): Try[String] = { + inputsPreProcessor(jobDescriptor.inputDeclarations) flatMap { mappedInputs => + jobDescriptor.call.task.instantiateCommand(mappedInputs, callEngineFunction, valueMapper) + } + } +} diff --git a/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala b/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala deleted file mode 100644 index 8919b0058..000000000 --- a/backend/src/main/scala/cromwell/backend/wdl/FileSystems.scala +++ /dev/null @@ -1,30 +0,0 @@ -package cromwell.backend.wdl - -import java.nio.file.{FileSystem, Path} - -import cromwell.core.PathFactory - -trait FileSystems extends PathFactory { - - /** - * Ordered list of filesystems to be used to execute wdl functions needing IO. - */ - def fileSystems: List[FileSystem] - - /** - * Function applied after a string is successfully resolved to a java.nio.Path - */ - def postMapping(path: Path): Path = path - - /** - * Function applied before a string is attempted to be resolved to a java.nio.Path - */ - def preMapping(string: String): String = string - - /** - * Use fileSystems in order to try to create a java.nio.Path from path that will be used to perform IO. - * If no filesystem is able to construct a Path from the String, an exception will be raised. - */ - protected final def toPath(path: String) = postMapping(buildPath(preMapping(path), fileSystems)) - -} diff --git a/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala b/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala new file mode 100644 index 000000000..ab6c66b0d --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wdl/OutputEvaluator.scala @@ -0,0 +1,19 @@ +package cromwell.backend.wdl + +import cromwell.backend.BackendJobDescriptor +import cromwell.core.JobOutput +import wdl4s.LocallyQualifiedName +import wdl4s.expression.WdlStandardLibraryFunctions +import wdl4s.values.WdlValue + +import scala.util.{Success, Try} + +object OutputEvaluator { + def evaluateOutputs(jobDescriptor: BackendJobDescriptor, + wdlFunctions: WdlStandardLibraryFunctions, + postMapper: WdlValue => Try[WdlValue] = v => Success(v)): Try[Map[LocallyQualifiedName, JobOutput]] = { + jobDescriptor.call.task.evaluateOutputs(jobDescriptor.inputDeclarations, wdlFunctions, postMapper) map { outputs => + outputs mapValues JobOutput + } + } +} diff --git a/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala deleted file mode 100644 index 9f297d477..000000000 --- a/backend/src/main/scala/cromwell/backend/wdl/PureFunctions.scala +++ /dev/null @@ -1,60 +0,0 @@ -package cromwell.backend.wdl - -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlArray, WdlFile, WdlFloat, WdlInteger, WdlString, WdlValue} - -import scala.util.{Failure, Success, Try} - -case object OnlyPureFunctions extends WdlStandardLibraryFunctions with PureFunctions { - override def readFile(path: String): String = throw new NotImplementedError("readFile not available in PureNoFunctions.") - override def read_json(params: Seq[Try[WdlValue]]): Try[WdlValue] = throw new NotImplementedError("read_json not available in PureNoFunctions.") - override def write_json(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("write_json not available in PureNoFunctions.") - override def size(params: Seq[Try[WdlValue]]): Try[WdlFloat] = throw new NotImplementedError("size not available in PureNoFunctions.") - override def write_tsv(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("write_tsv not available in PureNoFunctions.") - override def stdout(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("stdout not available in PureNoFunctions.") - override def glob(path: String, pattern: String): Seq[String] = throw new NotImplementedError("glob not available in PureNoFunctions.") - override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = throw new NotImplementedError("writeTempFile not available in PureNoFunctions.") - override def stderr(params: Seq[Try[WdlValue]]): Try[WdlFile] = throw new NotImplementedError("stderr not available in PureNoFunctions.") -} - -trait PureFunctions { this: WdlStandardLibraryFunctions => - - def range(params: Seq[Try[WdlValue]]): Try[WdlArray] = { - def extractAndValidateArguments = params.size match { - case 1 => validateArguments(params.head) - case n => Failure(new IllegalArgumentException(s"Invalid number of parameters for engine function seq: $n. Ensure seq(x: WdlInteger) takes exactly 1 parameters.")) - } - - def validateArguments(value: Try[WdlValue]) = value match { - case Success(intValue: WdlValue) if WdlIntegerType.isCoerceableFrom(intValue.wdlType) => - Integer.valueOf(intValue.valueString) match { - case i if i >= 0 => Success(i) - case n => Failure(new IllegalArgumentException(s"Parameter to seq must be greater than or equal to 0 (but got $n)")) - } - case _ => Failure(new IllegalArgumentException(s"Invalid parameter for engine function seq: $value.")) - } - - extractAndValidateArguments map { intValue => WdlArray(WdlArrayType(WdlIntegerType), (0 until intValue).map(WdlInteger(_))) } - } - - override def sub(params: Seq[Try[WdlValue]]): Try[WdlString] = { - def extractArguments = params.size match { - case 3 => Success((params.head, params(1), params(2))) - case n => Failure(new IllegalArgumentException(s"Invalid number of parameters for engine function sub: $n. sub takes exactly 3 parameters.")) - } - - def validateArguments(values: (Try[WdlValue], Try[WdlValue], Try[WdlValue])) = values match { - case (Success(strValue), Success(WdlString(pattern)), Success(replaceValue)) - if WdlStringType.isCoerceableFrom(strValue.wdlType) && - WdlStringType.isCoerceableFrom(replaceValue.wdlType) => - Success((strValue.valueString, pattern, replaceValue.valueString)) - case _ => Failure(new IllegalArgumentException(s"Invalid parameters for engine function sub: $values.")) - } - - for { - args <- extractArguments - (str, pattern, replace) <- validateArguments(args) - } yield WdlString(pattern.r.replaceAllIn(str, replace)) - } -} diff --git a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala index 1f06e10d8..0f3e4679e 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/wdl/ReadLikeFunctions.scala @@ -1,6 +1,7 @@ package cromwell.backend.wdl import cromwell.backend.MemorySize +import cromwell.core.path.PathFactory import wdl4s.expression.WdlStandardLibraryFunctions import wdl4s.parser.MemoryUnit import wdl4s.types.{WdlArrayType, WdlFileType, WdlObjectType, WdlStringType} @@ -8,7 +9,7 @@ import wdl4s.values._ import scala.util.{Failure, Success, Try} -trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions => +trait ReadLikeFunctions extends PathFactory { this: WdlStandardLibraryFunctions => import better.files._ /** @@ -27,7 +28,7 @@ trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions wdlObjects <- WdlObject.fromTsv(contents) } yield wdlObjects - override def readFile(path: String): String = File(toPath(path)).contentAsString + override def readFile(path: String): String = File(buildPath(path)).contentAsString /** * Read all lines from the file referenced by the first parameter and return an Array[String] @@ -93,7 +94,7 @@ trait ReadLikeFunctions extends FileSystems { this: WdlStandardLibraryFunctions for { value <- wdlValue unit <- convertTo - } yield MemorySize(File(toPath(value.valueString)).size.toDouble, MemoryUnit.Bytes).to(unit).amount + } yield MemorySize(File(buildPath(value.valueString)).size.toDouble, MemoryUnit.Bytes).to(unit).amount } params match { diff --git a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala b/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala index cb8eea297..0f602c76e 100644 --- a/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala +++ b/backend/src/main/scala/cromwell/backend/wdl/WriteFunctions.scala @@ -16,21 +16,17 @@ trait WriteFunctions { this: WdlStandardLibraryFunctions => */ def writeDirectory: Path - private lazy val absoluteDirectory = { - File(writeDirectory).createDirectories().path - } - - override def tempFilePath = absoluteDirectory.toString + private lazy val _writeDirectory = File(writeDirectory).createDirectories() def writeTempFile(path: String,prefix: String,suffix: String,content: String): String = throw new NotImplementedError("This method is not used anywhere and should be removed") private def writeContent(baseName: String, content: String): Try[WdlFile] = { - val fullPath = File(absoluteDirectory)./(s"$baseName${content.md5Sum}.tmp") + val tmpFile = _writeDirectory / s"$baseName-${content.md5Sum}.tmp" Try { - if (!fullPath.exists) fullPath.write(content) + if (tmpFile.notExists) tmpFile.write(content) } map { _ => - WdlFile(fullPath.pathAsString) + WdlFile(tmpFile.uri.toString) } } diff --git a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala deleted file mode 100644 index a10b46a0d..000000000 --- a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowFileSystemProvider.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.wfs - -import java.nio.file.FileSystems - -object DefaultWorkflowFileSystemProvider extends WorkflowFileSystemProvider { - override def fileSystemOption(params: WorkflowFileSystemProviderParams) = { - Option(FileSystems.getDefault) - } -} diff --git a/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala new file mode 100644 index 000000000..43af11d87 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wfs/DefaultWorkflowPathBuilder.scala @@ -0,0 +1,8 @@ +package cromwell.backend.wfs + +import cromwell.core.path.DefaultPathBuilder + + +object DefaultWorkflowPathBuilder extends WorkflowPathBuilder { + override def pathBuilderOption(params: WorkflowFileSystemProviderParams) = Option(DefaultPathBuilder) +} diff --git a/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala b/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala deleted file mode 100644 index de8272473..000000000 --- a/backend/src/main/scala/cromwell/backend/wfs/WorkflowFileSystemProvider.scala +++ /dev/null @@ -1,34 +0,0 @@ -package cromwell.backend.wfs - -import java.nio.file.FileSystem - -import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.io.WorkflowPaths -import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} -import cromwell.core.WorkflowOptions -import net.ceedubs.ficus.Ficus._ - -import scala.concurrent.ExecutionContext - -object WorkflowFileSystemProvider { - def workflowPaths(configurationDescriptor: BackendConfigurationDescriptor, - workflowDescriptor: BackendWorkflowDescriptor, - providers: Traversable[WorkflowFileSystemProvider], - fileSystemExecutionContext: ExecutionContext): WorkflowPaths = { - val backendConfig = configurationDescriptor.backendConfig - val fileSystemConfig = backendConfig.as[Option[Config]]("filesystems").getOrElse(ConfigFactory.empty()) - val globalConfig = configurationDescriptor.globalConfig - val params = WorkflowFileSystemProviderParams(fileSystemConfig, globalConfig, workflowDescriptor.workflowOptions, - fileSystemExecutionContext) - val fileSystems = providers.flatMap(_.fileSystemOption(params)).toList - new WorkflowPaths(workflowDescriptor, configurationDescriptor.backendConfig, fileSystems) - } -} - -final case class WorkflowFileSystemProviderParams(fileSystemConfig: Config, globalConfig: Config, - workflowOptions: WorkflowOptions, - fileSystemExecutionContext: ExecutionContext) - -trait WorkflowFileSystemProvider { - def fileSystemOption(params: WorkflowFileSystemProviderParams): Option[FileSystem] -} diff --git a/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala b/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala new file mode 100644 index 000000000..bd39ae2c9 --- /dev/null +++ b/backend/src/main/scala/cromwell/backend/wfs/WorkflowPathBuilder.scala @@ -0,0 +1,25 @@ +package cromwell.backend.wfs + +import com.typesafe.config.Config +import cromwell.backend.io.{WorkflowPathsWithDocker, WorkflowPaths} +import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescriptor} +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilder + +import scala.concurrent.ExecutionContext + +object WorkflowPathBuilder { + def workflowPaths(configurationDescriptor: BackendConfigurationDescriptor, + workflowDescriptor: BackendWorkflowDescriptor, + pathBuilders: List[PathBuilder]): WorkflowPaths = { + new WorkflowPathsWithDocker(workflowDescriptor, configurationDescriptor.backendConfig, pathBuilders) + } +} + +final case class WorkflowFileSystemProviderParams(fileSystemConfig: Config, globalConfig: Config, + workflowOptions: WorkflowOptions, + fileSystemExecutionContext: ExecutionContext) + +trait WorkflowPathBuilder { + def pathBuilderOption(params: WorkflowFileSystemProviderParams): Option[PathBuilder] +} diff --git a/backend/src/test/scala/cromwell/backend/BackendSpec.scala b/backend/src/test/scala/cromwell/backend/BackendSpec.scala index c5617ab43..7e2354bd5 100644 --- a/backend/src/test/scala/cromwell/backend/BackendSpec.scala +++ b/backend/src/test/scala/cromwell/backend/BackendSpec.scala @@ -1,18 +1,20 @@ package cromwell.backend import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.backend.io.TestWorkflows._ import cromwell.core.{WorkflowId, WorkflowOptions} +import wdl4s.util.AggregatedException import org.scalatest.Matchers import org.scalatest.concurrent.ScalaFutures import org.scalatest.time.{Millis, Seconds, Span} +import org.specs2.mock.Mockito import spray.json.{JsObject, JsValue} import wdl4s._ import wdl4s.expression.NoFunctions import wdl4s.values.WdlValue -trait BackendSpec extends ScalaFutures with Matchers { +trait BackendSpec extends ScalaFutures with Matchers with Mockito { implicit val defaultPatience = PatienceConfig(timeout = Span(5, Seconds), interval = Span(500, Millis)) @@ -26,32 +28,44 @@ trait BackendSpec extends ScalaFutures with Matchers { runtime: String = "") = { BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime)), + WdlNamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime), Seq.empty[ImportResolver]).workflow, inputs, options ) } + def fqnMapToDeclarationMap(m: Map[String, WdlValue]): Map[Declaration, WdlValue] = { + m map { + case (fqn, v) => + val mockDeclaration = mock[Declaration] + mockDeclaration.fullyQualifiedName returns fqn + mockDeclaration.unqualifiedName returns fqn.split('.').lastOption.getOrElse(fqn) + mockDeclaration -> v + } + } + def jobDescriptorFromSingleCallWorkflow(workflowDescriptor: BackendWorkflowDescriptor, inputs: Map[String, WdlValue], options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputs).get // .get is OK here because this is a test + val inputDeclarations = call.evaluateTaskInputs(inputs, NoFunctions) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def jobDescriptorFromSingleCallWorkflow(wdl: WdlSource, options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { val workflowDescriptor = buildWorkflowDescriptor(wdl) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, workflowDescriptor.inputs).get // .get is OK here because this is a test + val inputDeclarations = fqnMapToDeclarationMap(workflowDescriptor.inputs) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, workflowDescriptor.inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def jobDescriptorFromSingleCallWorkflow(wdl: WdlSource, @@ -60,16 +74,17 @@ trait BackendSpec extends ScalaFutures with Matchers { options: WorkflowOptions, runtimeAttributeDefinitions: Set[RuntimeAttributeDefinition]): BackendJobDescriptor = { val workflowDescriptor = buildWorkflowDescriptor(wdl, runtime = runtime) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, attempt) - val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, workflowDescriptor.inputs).get // .get is OK here because this is a test + val inputDeclarations = fqnMapToDeclarationMap(workflowDescriptor.inputs) + val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(call.task.runtimeAttributes, NoFunctions, inputDeclarations).get // .get is OK here because this is a test val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, options)(evaluatedAttributes) - BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, workflowDescriptor.inputs) + BackendJobDescriptor(workflowDescriptor, jobKey, runtimeAttributes, inputDeclarations) } def assertResponse(executionResponse: BackendJobExecutionResponse, expectedResponse: BackendJobExecutionResponse) = { (executionResponse, expectedResponse) match { - case (SucceededResponse(_, _, responseOutputs, _, _), SucceededResponse(_, _, expectedOutputs, _, _)) => + case (JobSucceededResponse(_, _, responseOutputs, _, _), JobSucceededResponse(_, _, expectedOutputs, _, _)) => responseOutputs.size shouldBe expectedOutputs.size responseOutputs foreach { case (fqn, out) => @@ -77,16 +92,23 @@ trait BackendSpec extends ScalaFutures with Matchers { expectedOut.isDefined shouldBe true expectedOut.get.wdlValue.valueString shouldBe out.wdlValue.valueString } - case (FailedNonRetryableResponse(_, failure, _), FailedNonRetryableResponse(_, expectedFailure, _)) => + case (JobFailedNonRetryableResponse(_, failure, _), JobFailedNonRetryableResponse(_, expectedFailure, _)) => failure.getClass shouldBe expectedFailure.getClass - failure.getMessage should include(expectedFailure.getMessage) - case (FailedRetryableResponse(_, failure, _), FailedRetryableResponse(_, expectedFailure, _)) => + concatenateCauseMessages(failure) should include(expectedFailure.getMessage) + case (JobFailedRetryableResponse(_, failure, _), JobFailedRetryableResponse(_, expectedFailure, _)) => failure.getClass shouldBe expectedFailure.getClass case (response, expectation) => fail(s"Execution response $response wasn't conform to expectation $expectation") } } + private def concatenateCauseMessages(t: Throwable): String = t match { + case null => "" + case ae: AggregatedException => ae.getMessage + ae.exceptions.map(concatenateCauseMessages(_)).mkString + concatenateCauseMessages(ae.getCause) + case other: Throwable => other.getMessage + concatenateCauseMessages(t.getCause) + } + + def executeJobAndAssertOutputs(backend: BackendJobExecutionActor, expectedResponse: BackendJobExecutionResponse) = { whenReady(backend.execute) { executionResponse => assertResponse(executionResponse, expectedResponse) @@ -97,13 +119,13 @@ trait BackendSpec extends ScalaFutures with Matchers { ConfigFactory.parseString("{}"), ConfigFactory.load()) def firstJobDescriptorKey(workflowDescriptor: BackendWorkflowDescriptor): BackendJobDescriptorKey = { - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head BackendJobDescriptorKey(call, None, 1) } def firstJobDescriptor(workflowDescriptor: BackendWorkflowDescriptor, inputs: Map[String, WdlValue] = Map.empty) = { - BackendJobDescriptor(workflowDescriptor, firstJobDescriptorKey(workflowDescriptor), Map.empty, inputs) + BackendJobDescriptor(workflowDescriptor, firstJobDescriptorKey(workflowDescriptor), Map.empty, fqnMapToDeclarationMap(inputs)) } } diff --git a/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala b/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala index f03ca2cf9..dffd64884 100644 --- a/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala +++ b/backend/src/test/scala/cromwell/backend/io/JobPathsSpec.scala @@ -6,7 +6,7 @@ import better.files._ import com.typesafe.config.ConfigFactory import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptorKey, BackendSpec} import org.scalatest.{FlatSpec, Matchers} -import wdl4s.Call +import wdl4s.TaskCall class JobPathsSpec extends FlatSpec with Matchers with BackendSpec { @@ -32,46 +32,46 @@ class JobPathsSpec extends FlatSpec with Matchers with BackendSpec { "JobPaths" should "provide correct paths for a job" in { val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) - val call: Call = wd.workflowNamespace.workflow.calls.head + val call: TaskCall = wd.workflow.taskCalls.head val jobKey = BackendJobDescriptorKey(call, None, 1) - val jobPaths = new JobPaths(wd, backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, wd, backendConfig) val id = wd.id jobPaths.callRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello").pathAsString jobPaths.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString jobPaths.returnCode.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/rc").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/rc").pathAsString jobPaths.script.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/script").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/script").pathAsString jobPaths.stderr.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/stderr").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stderr").pathAsString jobPaths.stdout.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution/stdout").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stdout").pathAsString jobPaths.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/execution").pathAsString jobPaths.callDockerRoot.toString shouldBe - File(s"/root/hello/$id/call-hello").pathAsString + File(s"/root/wf_hello/$id/call-hello").pathAsString jobPaths.callExecutionDockerRoot.toString shouldBe - File(s"/root/hello/$id/call-hello/execution").pathAsString - jobPaths.toDockerPath(Paths.get(s"local-cromwell-executions/hello/$id/call-hello/execution/stdout")).toString shouldBe - File(s"/root/hello/$id/call-hello/execution/stdout").pathAsString + File(s"/root/wf_hello/$id/call-hello/execution").pathAsString + jobPaths.toDockerPath(Paths.get(s"local-cromwell-executions/wf_hello/$id/call-hello/execution/stdout")).toString shouldBe + File(s"/root/wf_hello/$id/call-hello/execution/stdout").pathAsString jobPaths.toDockerPath(Paths.get("/root/dock/path")).toString shouldBe File("/root/dock/path").pathAsString val jobKeySharded = BackendJobDescriptorKey(call, Option(0), 1) - val jobPathsSharded = new JobPaths(wd, backendConfig, jobKeySharded) + val jobPathsSharded = new JobPathsWithDocker(jobKeySharded, wd, backendConfig) jobPathsSharded.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/shard-0/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/execution").pathAsString val jobKeyAttempt = BackendJobDescriptorKey(call, None, 2) - val jobPathsAttempt = new JobPaths(wd, backendConfig, jobKeyAttempt) + val jobPathsAttempt = new JobPathsWithDocker(jobKeyAttempt, wd, backendConfig) jobPathsAttempt.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/attempt-2/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/attempt-2/execution").pathAsString val jobKeyShardedAttempt = BackendJobDescriptorKey(call, Option(0), 2) - val jobPathsShardedAttempt = new JobPaths(wd, backendConfig, jobKeyShardedAttempt) + val jobPathsShardedAttempt = new JobPathsWithDocker(jobKeyShardedAttempt, wd, backendConfig) jobPathsShardedAttempt.callExecutionRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id/call-hello/shard-0/attempt-2/execution").pathAsString + File(s"local-cromwell-executions/wf_hello/$id/call-hello/shard-0/attempt-2/execution").pathAsString } } diff --git a/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala b/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala index 34497e2ef..28d35977d 100644 --- a/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala +++ b/backend/src/test/scala/cromwell/backend/io/TestWorkflows.scala @@ -23,7 +23,7 @@ object TestWorkflows { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -39,7 +39,7 @@ object TestWorkflows { | } |} | - |workflow goodbye { + |workflow wf_goodbye { | call goodbye |} """.stripMargin @@ -61,7 +61,7 @@ object TestWorkflows { | RUNTIME |} | - |workflow localize { + |workflow wf_localize { | File workflowFile | call localize { input: inputFileFromCallInputs = workflowFile } |} @@ -76,7 +76,7 @@ object TestWorkflows { | } |} | - |workflow abort { + |workflow wf_abort { | call abort |} """.stripMargin @@ -93,7 +93,7 @@ object TestWorkflows { | } |} | - |workflow scattering { + |workflow wf_scattering { | Array[Int] numbers = [1, 2, 3] | scatter (i in numbers) { | call scattering { input: intNumber = i } @@ -117,7 +117,7 @@ object TestWorkflows { | } |} | - |workflow localize { + |workflow wf_localize { | call localize |} """.stripMargin @@ -133,7 +133,7 @@ object TestWorkflows { | } |} | - |workflow localize { + |workflow wf_localize { | call localize |} """.stripMargin diff --git a/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala b/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala index bfae5930d..36d774bf0 100644 --- a/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala +++ b/backend/src/test/scala/cromwell/backend/io/WorkflowPathsSpec.scala @@ -2,13 +2,13 @@ package cromwell.backend.io import better.files._ import com.typesafe.config.Config -import cromwell.backend.BackendSpec -import org.mockito.Matchers._ +import cromwell.backend.{BackendJobBreadCrumb, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.{JobKey, WorkflowId} import org.mockito.Mockito._ -import org.scalatest.mockito.MockitoSugar import org.scalatest.{FlatSpec, Matchers} +import wdl4s.{Call, Workflow} -class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec with MockitoSugar { +class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec { val backendConfig = mock[Config] @@ -16,11 +16,49 @@ class WorkflowPathsSpec extends FlatSpec with Matchers with BackendSpec with Moc when(backendConfig.hasPath(any[String])).thenReturn(true) when(backendConfig.getString(any[String])).thenReturn("local-cromwell-executions") // This is the folder defined in the config as the execution root dir val wd = buildWorkflowDescriptor(TestWorkflows.HelloWorld) - val workflowPaths = new WorkflowPaths(wd, backendConfig) + val workflowPaths = new WorkflowPathsWithDocker(wd, backendConfig) val id = wd.id workflowPaths.workflowRoot.toString shouldBe - File(s"local-cromwell-executions/hello/$id").pathAsString + File(s"local-cromwell-executions/wf_hello/$id").pathAsString workflowPaths.dockerWorkflowRoot.toString shouldBe - s"/root/hello/$id" + s"/root/wf_hello/$id" + } + + "WorkflowPaths" should "provide correct paths for a sub workflow" in { + when(backendConfig.hasPath(any[String])).thenReturn(true) + when(backendConfig.getString(any[String])).thenReturn("local-cromwell-executions") // This is the folder defined in the config as the execution root dir + + val rootWd = mock[BackendWorkflowDescriptor] + val rootWorkflow = mock[Workflow] + val rootWorkflowId = WorkflowId.randomId() + rootWorkflow.unqualifiedName returns "rootWorkflow" + rootWd.workflow returns rootWorkflow + rootWd.id returns rootWorkflowId + + val subWd = mock[BackendWorkflowDescriptor] + val subWorkflow = mock[Workflow] + val subWorkflowId = WorkflowId.randomId() + subWorkflow.unqualifiedName returns "subWorkflow" + subWd.workflow returns subWorkflow + subWd.id returns subWorkflowId + + val call1 = mock[Call] + call1.unqualifiedName returns "call1" + val call2 = mock[Call] + call2.unqualifiedName returns "call2" + + val jobKey = new JobKey { + override def scope = call1 + override def tag: String = "tag1" + override def index: Option[Int] = Option(1) + override def attempt: Int = 2 + } + + subWd.breadCrumbs returns List(BackendJobBreadCrumb(rootWorkflow, rootWorkflowId, jobKey)) + subWd.id returns subWorkflowId + + val workflowPaths = new WorkflowPathsWithDocker(subWd, backendConfig) + workflowPaths.workflowRoot.toString shouldBe File(s"local-cromwell-executions/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId").pathAsString + workflowPaths.dockerWorkflowRoot.toString shouldBe s"/root/rootWorkflow/$rootWorkflowId/call-call1/shard-1/attempt-2/subWorkflow/$subWorkflowId" } } diff --git a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala index d1df9d790..325f874b8 100644 --- a/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala +++ b/backend/src/test/scala/cromwell/backend/validation/RuntimeAttributesDefaultSpec.scala @@ -80,7 +80,7 @@ class RuntimeAttributesDefaultSpec extends FlatSpec with Matchers { val defaults = workflowOptionsDefault(workflowOptions, coercionMap) defaults.isFailure shouldBe true - defaults.failed.get.getMessage shouldBe s"Could not parse JsonValue ${map("str")} to valid WdlValue for runtime attribute str" + defaults.failed.get.getMessage shouldBe s": RuntimeException: Could not parse JsonValue ${map("str")} to valid WdlValue for runtime attribute str" } it should "fold default values" in { diff --git a/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala b/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala new file mode 100644 index 000000000..fb6507377 --- /dev/null +++ b/backend/src/test/scala/cromwell/backend/wdl/PureStandardLibraryFunctionsSpec.scala @@ -0,0 +1,30 @@ +package cromwell.backend.wdl + +import org.scalatest.{FlatSpec, Matchers} +import wdl4s.expression.PureStandardLibraryFunctions +import wdl4s.types.{WdlArrayType, WdlIntegerType} +import wdl4s.values.{WdlArray, WdlInteger} + +import scala.util.Success + + +class PureStandardLibraryFunctionsSpec extends FlatSpec with Matchers { + + behavior of "transpose" + + it should "transpose a 2x3 into a 3x2" in { + val inArray = WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2), WdlInteger(3))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(4), WdlInteger(5), WdlInteger(6))) + )) + + val expectedResult = WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(4))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(5))), + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(3), WdlInteger(6))) + )) + + PureStandardLibraryFunctions.transpose(Seq(Success(inArray))) should be(Success(expectedResult)) + } + +} diff --git a/build.sbt b/build.sbt index 8e666691f..bfaae9773 100644 --- a/build.sbt +++ b/build.sbt @@ -9,6 +9,7 @@ lazy val gcsFileSystem = (project in file("filesystems/gcs")) .settings(gcsFileSystemSettings:_*) .withTestSettings .dependsOn(core) + .dependsOn(core % "test->test") lazy val databaseSql = (project in file("database/sql")) .settings(databaseSqlSettings:_*) diff --git a/core/src/main/resources/logback.xml b/core/src/main/resources/logback.xml new file mode 100644 index 000000000..e165bc8b2 --- /dev/null +++ b/core/src/main/resources/logback.xml @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + %date %X{sourceThread} %-5level - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + + + + ${FILEROLLER_DIR}/${FILEROLLER_NAME} + + + + + + ${FILEROLLER_DIR}/${FILEROLLER_NAMEPATTERN}-${FILEROLLER_NAME} + + + ${FILEROLLER_DIR}/%d{yyyyMMdd}-${FILEROLLER_NAME} + + + + ${FILEROLLER_MAXHISTORY} + + + + ${FILEROLLER_SIZECAP} + + + + + + %d{yyyy-MM-dd HH:mm:ss,SSS} [%thread] %-5level %logger{35} - %msg%n + + + + + + + + + + + + + + + + + diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 9bf2807ea..52df9e771 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -56,7 +56,7 @@ akka { system { # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting - abort-jobs-on-terminate = false + #abort-jobs-on-terminate = false # Max number of retries per job that the engine will attempt in case of a retryable failure received from the backend max-retries = 10 @@ -97,7 +97,14 @@ workflow-options { // Optional call-caching configuration. call-caching { + # Allows re-use of existing results for jobs you've already run + # (default: false) enabled = false + + # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies + # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users): + # (default: true) + invalidate-bad-cache-results = true } google { @@ -149,7 +156,7 @@ backend { run-in-background = true runtime-attributes = "String? docker" submit = "/bin/bash ${script}" - submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}" + submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash ${docker_cwd}/execution/script" # Root directory where Cromwell writes job results. This directory must be # visible and writeable by the Cromwell process as well as the jobs that Cromwell @@ -163,6 +170,7 @@ backend { ] caching { + # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below: duplication-strategy: [ "hard-link", "soft-link", "copy" ] @@ -251,7 +259,7 @@ backend { # #6. Job command. # docker { # #Allow soft links in dockerized jobs - # cmd = "docker run -w %s %s %s %s --rm %s %s" + # cmd = "docker run -w %s %s %s %s --rm %s /bin/bash -c \"%s\"" # defaultWorkingDir = "/workingDir/" # defaultOutputDir = "/output/" # } @@ -311,6 +319,15 @@ backend { # # Base bucket for workflow executions # root = "gs://my-cromwell-workflows-bucket" # + # # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for + # # your project. + # # + # # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will + # # cause a drop in performance. Setting this value too high will cause QPS based locks from Google. + # # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds" + # # See https://cloud.google.com/genomics/quotas for more information + # genomics-api-queries-per-100-seconds = 1000 + # # # Polling for completion backs-off gradually for slower-running jobs. # # This is the maximum polling interval (in seconds): # maximum-polling-interval = 600 @@ -325,6 +342,12 @@ backend { # # A reference to an auth defined in the `google` stanza at the top. This auth is used to create # # Pipelines and manipulate auth JSONs. # auth = "application-default" + # + # // alternative service account to use on the launched compute instance + # // NOTE: If combined with service account authorization, both that serivce account and this service account + # // must be able to read and write to the 'root' GCS path + # compute-service-account = "default" + # # # Endpoint for APIs, no reason to change this unless directed by Google. # endpoint-url = "https://genomics.googleapis.com/" # } @@ -338,6 +361,15 @@ backend { # } #} + #AWS { + # actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory" + # config { + # ## These two settings are required to authenticate with the ECS service: + # accessKeyId = "..." + # secretKey = "..." + # } + #} + } } diff --git a/core/src/main/scala/cromwell/core/CallKey.scala b/core/src/main/scala/cromwell/core/CallKey.scala new file mode 100644 index 000000000..547eadabd --- /dev/null +++ b/core/src/main/scala/cromwell/core/CallKey.scala @@ -0,0 +1,7 @@ +package cromwell.core + +import wdl4s.Call + +trait CallKey extends JobKey { + def scope: Call +} diff --git a/core/src/main/scala/cromwell/core/ExecutionStatus.scala b/core/src/main/scala/cromwell/core/ExecutionStatus.scala index 76acb29a4..353b44d65 100644 --- a/core/src/main/scala/cromwell/core/ExecutionStatus.scala +++ b/core/src/main/scala/cromwell/core/ExecutionStatus.scala @@ -2,7 +2,7 @@ package cromwell.core object ExecutionStatus extends Enumeration { type ExecutionStatus = Value - val NotStarted, Starting, Running, Failed, Preempted, Done, Aborted = Value + val NotStarted, QueuedInCromwell, Starting, Running, Failed, Preempted, Done, Aborted = Value val TerminalStatuses = Set(Failed, Done, Aborted, Preempted) implicit class EnhancedExecutionStatus(val status: ExecutionStatus) extends AnyVal { diff --git a/core/src/main/scala/cromwell/core/ExecutionStore.scala b/core/src/main/scala/cromwell/core/ExecutionStore.scala deleted file mode 100644 index 1632061ce..000000000 --- a/core/src/main/scala/cromwell/core/ExecutionStore.scala +++ /dev/null @@ -1,13 +0,0 @@ -package cromwell.core - -import cromwell.core.ExecutionStatus._ - - -object ExecutionStore { - def empty = ExecutionStore(Map.empty) - type ExecutionStoreEntry = (JobKey, ExecutionStatus) -} - -case class ExecutionStore(store: Map[JobKey, ExecutionStatus]) { - def add(values: Map[JobKey, ExecutionStatus]) = this.copy(store = store ++ values) -} diff --git a/core/src/main/scala/cromwell/core/JobKey.scala b/core/src/main/scala/cromwell/core/JobKey.scala index 374e3d0eb..9fd22b31e 100644 --- a/core/src/main/scala/cromwell/core/JobKey.scala +++ b/core/src/main/scala/cromwell/core/JobKey.scala @@ -1,9 +1,9 @@ package cromwell.core -import wdl4s.Scope +import wdl4s.{GraphNode, Scope} trait JobKey { - def scope: Scope + def scope: Scope with GraphNode def index: Option[Int] def attempt: Int def tag: String @@ -12,4 +12,6 @@ trait JobKey { import ExecutionIndex.IndexEnhancedIndex s"${scope.fullyQualifiedName}:${index.fromIndex}:$attempt" } + + def isShard = index.isDefined } diff --git a/core/src/main/scala/cromwell/core/OutputStore.scala b/core/src/main/scala/cromwell/core/OutputStore.scala deleted file mode 100644 index 38bee68db..000000000 --- a/core/src/main/scala/cromwell/core/OutputStore.scala +++ /dev/null @@ -1,40 +0,0 @@ -package cromwell.core - -import cromwell.core.ExecutionIndex._ -import cromwell.core.OutputStore.{OutputCallKey, OutputEntry} -import wdl4s.types.WdlType -import wdl4s.util.TryUtil -import wdl4s.values.{WdlCallOutputsObject, WdlValue} -import wdl4s.{Call, Scope} - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object OutputStore { - case class OutputEntry(name: String, wdlType: WdlType, wdlValue: Option[WdlValue]) - case class OutputCallKey(call: Scope, index: ExecutionIndex) - def empty = OutputStore(Map.empty) -} - -case class OutputStore(store: Map[OutputCallKey, Traversable[OutputEntry]]) { - def add(values: Map[OutputCallKey, Traversable[OutputEntry]]) = this.copy(store = store ++ values) - - def fetchCallOutputEntries(call: Call, index: ExecutionIndex): Try[WdlCallOutputsObject] = { - def outputEntriesToMap(outputs: Traversable[OutputEntry]): Map[String, Try[WdlValue]] = { - outputs map { output => - output.wdlValue match { - case Some(wdlValue) => output.name -> Success(wdlValue) - case None => output.name -> Failure(new RuntimeException(s"Could not retrieve output ${output.name} value")) - } - } toMap - } - - store.get(OutputCallKey(call, index)) match { - case Some(outputs) => - TryUtil.sequenceMap(outputEntriesToMap(outputs), s"Output fetching for call ${call.unqualifiedName}") map { outputsMap => - WdlCallOutputsObject(call, outputsMap) - } - case None => Failure(new RuntimeException(s"Could not find call ${call.unqualifiedName}")) - } - } -} diff --git a/core/src/main/scala/cromwell/core/PathFactory.scala b/core/src/main/scala/cromwell/core/PathFactory.scala deleted file mode 100644 index f85a05c95..000000000 --- a/core/src/main/scala/cromwell/core/PathFactory.scala +++ /dev/null @@ -1,135 +0,0 @@ -package cromwell.core - -import java.io.Writer -import java.nio.file.{FileSystem, Path} - -import better.files.File - -import scala.collection.immutable.Queue -import scala.util.{Success, Failure, Try} - -class FileSystemNotFound(str: String) extends CromwellFatalException( - new IllegalArgumentException(s"Could not find suitable filesystem to parse $str") -) - -trait PathFactory { - private val schemeMatcher = """([a-z]+://).*""".r - - def findFileSystem(rawString: String, fss: List[FileSystem], mapping: PartialFunction[FileSystem, Try[Path]]) = { - fss.toStream collect mapping collectFirst { case Success(p) => p } getOrElse { - throw new FileSystemNotFound(rawString) - } - } - - def buildPath(rawString: String, fileSystems: List[FileSystem]): Path = { - findFileSystem(rawString, fileSystems, { - case fs: FileSystem => - if (hasWrongScheme(rawString, fs)) { - Failure(new IllegalArgumentException(s"$rawString scheme doesn't match ${fs.provider.getScheme}")) - } else { - Try(fs.getPath(rawString)) - } - }) - } - - def buildFile(rawString: String, fileSystems: List[FileSystem]): File = File(buildPath(rawString, fileSystems)) - - private def hasWrongScheme(rawString: String, fileSystem: FileSystem): Boolean = { - schemeMatcher.findFirstMatchIn(rawString) match { - case Some(m) => m.group(1) != fileSystem.provider().getScheme - case _ => false - } - } -} - -object PathFactory { - def swapExt(filePath: String, oldExt: String, newExt: String): String = { - filePath.stripSuffix(oldExt) + newExt - } - - implicit class EnhancedPath(val path: Path) extends AnyVal { - def swapExt(oldExt: String, newExt: String): Path = { - path.getFileSystem.getPath(s"${path.toString.stripSuffix(oldExt)}$newExt") - } - - def untailed = UntailedWriter(path) - - def tailed(tailedSize: Int) = TailedWriter(path, tailedSize) - } - - implicit class FlushingAndClosingWriter(writer: Writer) { - /** Convenience method to flush and close in one shot. */ - def flushAndClose() = { - writer.flush() - writer.close() - } - } -} - -/** - * Used with a `ProcessLogger`, writes lines with a newline. - */ -trait PathWriter { - import better.files._ - - val path: Path - lazy val writer: Writer = File(path).newBufferedWriter - - /** - * Passed to `ProcessLogger` to add a new line. - * - * @param string Line to add to the logs. - */ - def writeWithNewline(string: String): Unit = { - writer.write(string) - writer.write("\n") - } -} - -/** - * Used with a `ProcessLogger`, writes lines with a newline. - * - * @param path Path to the log file. - */ -case class UntailedWriter(path: Path) extends PathWriter - -/** - * Used with a `ProcessLogger`, queues up the `tailedSize` number of lines. - * - * @param path Path to the log file. - * @param tailedSize Maximum number of lines to save in the internal FIFO queue. - */ -case class TailedWriter(path: Path, tailedSize: Int) extends PathWriter { - private var isTailed = false - private var tailedLines: Queue[String] = Queue.empty - - /** - * Passed to `ProcessLogger` to add a new line, and adds the line to the tailed queue. - * - * @param string Line to add to the logs. - */ - override def writeWithNewline(string: String): Unit = { - tailedLines :+= string - while (tailedLines.size > tailedSize) { - tailedLines = tailedLines.takeRight(tailedSize) - isTailed = true - } - super.writeWithNewline(string) - } - - /** - * Returns a descriptive tail of the `path` and the last `tailedLines` written. - * - * @return a descriptive tail of the `path` and the last `tailedLines` written. - */ - def tailString: String = { - if (tailedLines.isEmpty) { - s"Contents of $path were empty." - } else if (isTailed) { - s"Last ${tailedLines.size} of $path:\n${tailedLines.mkString("\n")}" - } else { - s"Contents of $path:\n${tailedLines.mkString("\n")}" - } - } -} - diff --git a/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala b/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala index 80ade0a4d..922be7ffb 100644 --- a/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala +++ b/core/src/main/scala/cromwell/core/WorkflowMetadataKeys.scala @@ -13,9 +13,11 @@ object WorkflowMetadataKeys { val WorkflowLog = "workflowLog" val Failures = "failures" val WorkflowRoot = "workflowRoot" + val ParentWorkflowId = "parentWorkflowId" val SubmissionSection = "submittedFiles" val SubmissionSection_Workflow = "workflow" val SubmissionSection_Inputs = "inputs" val SubmissionSection_Options = "options" + val SubmissionSection_Imports = "imports" } diff --git a/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala b/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala deleted file mode 100644 index ed03f1733..000000000 --- a/core/src/main/scala/cromwell/core/WorkflowSourceFiles.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.core - -import wdl4s.{WdlJson, WdlSource} - -/** - * Represents the collection of source files that a user submits to run a workflow - */ -final case class WorkflowSourceFiles(wdlSource: WdlSource, inputsJson: WdlJson, - workflowOptionsJson: WorkflowOptionsJson) diff --git a/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala new file mode 100644 index 000000000..ac7a7d6aa --- /dev/null +++ b/core/src/main/scala/cromwell/core/WorkflowSourceFilesCollection.scala @@ -0,0 +1,43 @@ +package cromwell.core + +import wdl4s.{WdlJson, WdlSource} + +/** + * Represents the collection of source files that a user submits to run a workflow + */ + +sealed trait WorkflowSourceFilesCollection { + def wdlSource: WdlSource + def inputsJson: WdlJson + def workflowOptionsJson: WorkflowOptionsJson + def importsZipFileOption: Option[Array[Byte]] = this match { + case _: WorkflowSourceFilesWithoutImports => None + case WorkflowSourceFilesWithDependenciesZip(_, _, _, importsZip) => Option(importsZip) // i.e. Some(importsZip) if our wiring is correct + } + + def copyOptions(workflowOptions: WorkflowOptionsJson) = this match { + case w: WorkflowSourceFilesWithoutImports => WorkflowSourceFilesWithoutImports(w.wdlSource, w.inputsJson, workflowOptions) + case w: WorkflowSourceFilesWithDependenciesZip => WorkflowSourceFilesWithDependenciesZip(w.wdlSource, w.inputsJson, workflowOptions, w.importsZip) + } +} + +object WorkflowSourceFilesCollection { + def apply(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson, + importsFile: Option[Array[Byte]]): WorkflowSourceFilesCollection = importsFile match { + case Some(imports) => WorkflowSourceFilesWithDependenciesZip(wdlSource, inputsJson, workflowOptionsJson, imports) + case None => WorkflowSourceFilesWithoutImports(wdlSource, inputsJson, workflowOptionsJson) + } +} + +final case class WorkflowSourceFilesWithoutImports(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson) extends WorkflowSourceFilesCollection + +final case class WorkflowSourceFilesWithDependenciesZip(wdlSource: WdlSource, + inputsJson: WdlJson, + workflowOptionsJson: WorkflowOptionsJson, + importsZip: Array[Byte]) extends WorkflowSourceFilesCollection { + override def toString = s"WorkflowSourceFilesWithDependenciesZip($wdlSource, $inputsJson, $workflowOptionsJson, <>)" +} diff --git a/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala b/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala index df9fb679a..ed939d6aa 100644 --- a/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala +++ b/core/src/main/scala/cromwell/core/callcaching/CallCachingMode.scala @@ -19,7 +19,7 @@ case object CallCachingOff extends CallCachingMode { override val withoutWrite = this } -case class CallCachingActivity(readWriteMode: ReadWriteMode) extends CallCachingMode { +case class CallCachingActivity(readWriteMode: ReadWriteMode, options: CallCachingOptions = CallCachingOptions(invalidateBadCacheResults = true)) extends CallCachingMode { override val readFromCache = readWriteMode.r override val writeToCache = readWriteMode.w override lazy val withoutRead: CallCachingMode = if (!writeToCache) CallCachingOff else this.copy(readWriteMode = WriteCache) @@ -35,6 +35,4 @@ case object ReadCache extends ReadWriteMode { override val w = false } case object WriteCache extends ReadWriteMode { override val r = false } case object ReadAndWriteCache extends ReadWriteMode -sealed trait DockerHashingType -case object HashDockerName extends DockerHashingType -case object HashDockerNameAndLookupDockerHash extends DockerHashingType +final case class CallCachingOptions(invalidateBadCacheResults: Boolean = true) diff --git a/core/src/main/scala/cromwell/core/logging/JobLogger.scala b/core/src/main/scala/cromwell/core/logging/JobLogger.scala index 3eb96d4b5..1388f5f6b 100644 --- a/core/src/main/scala/cromwell/core/logging/JobLogger.scala +++ b/core/src/main/scala/cromwell/core/logging/JobLogger.scala @@ -9,7 +9,7 @@ trait JobLogging extends ActorLogging { this: Actor => def workflowId: WorkflowId def jobTag: String - lazy val jobLogger: Logger = new JobLogger(self.path.name, workflowId, jobTag, Option(log)) + lazy val jobLogger = new JobLogger(self.path.name, workflowId, jobTag, Option(log)) } /** diff --git a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala index b79e9f7b6..ba339f011 100644 --- a/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala +++ b/core/src/main/scala/cromwell/core/logging/WorkflowLogger.scala @@ -15,9 +15,9 @@ import org.slf4j.helpers.NOPLogger import org.slf4j.{Logger, LoggerFactory} trait WorkflowLogging extends ActorLogging { this: Actor => - def workflowId: WorkflowId + def workflowIdForLogging: WorkflowId - lazy val workflowLogger = new WorkflowLogger(self.path.name, workflowId, Option(log)) + lazy val workflowLogger = new WorkflowLogger(self.path.name, workflowIdForLogging, Option(log)) } object WorkflowLogger { @@ -113,10 +113,10 @@ class WorkflowLogger(loggerName: String, import WorkflowLogger._ - val workflowLogPath = workflowLogConfiguration.map(workflowLogConfigurationActual => + lazy val workflowLogPath = workflowLogConfiguration.map(workflowLogConfigurationActual => File(workflowLogConfigurationActual.dir).createDirectories() / s"workflow.$workflowId.log").map(_.path) - val fileLogger = workflowLogPath match { + lazy val fileLogger = workflowLogPath match { case Some(path) => makeFileLogger(path, Level.toLevel(sys.props.getOrElse("LOG_LEVEL", "debug"))) case None => NOPLogger.NOP_LOGGER } diff --git a/core/src/main/scala/cromwell/core/package.scala b/core/src/main/scala/cromwell/core/package.scala index 3334bfa24..def878003 100644 --- a/core/src/main/scala/cromwell/core/package.scala +++ b/core/src/main/scala/cromwell/core/package.scala @@ -7,7 +7,7 @@ package object core { type FullyQualifiedName = String type WorkflowOutputs = Map[FullyQualifiedName, JobOutput] type WorkflowOptionsJson = String - type JobOutputs = Map[LocallyQualifiedName, JobOutput] + type CallOutputs = Map[LocallyQualifiedName, JobOutput] type HostInputs = Map[String, WdlValue] type EvaluatedRuntimeAttributes = Map[String, WdlValue] } diff --git a/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala b/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala new file mode 100644 index 000000000..b27b9135e --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/CustomRetryParams.scala @@ -0,0 +1,25 @@ +package cromwell.core.path + +import cromwell.core.retry.{Backoff, SimpleExponentialBackoff} + +import scala.concurrent.duration.Duration +import scala.concurrent.duration._ +import scala.language.postfixOps + +object CustomRetryParams { + val Default = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = throwableToFalse, + isFatal = throwableToFalse + ) + + def throwableToFalse(t: Throwable) = false +} + +case class CustomRetryParams(timeout: Duration, + maxRetries: Option[Int], + backoff: Backoff, + isTransient: Throwable => Boolean, + isFatal: Throwable => Boolean) diff --git a/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala b/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala new file mode 100644 index 000000000..7dc60c1e8 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/DefaultPathBuilder.scala @@ -0,0 +1,21 @@ +package cromwell.core.path + +import java.net.URI +import java.nio.file.{FileSystems, Path} + +import scala.util.Try + +/** + * PathBuilder using the default FileSystem to attempt to build a Path. + */ +case object DefaultPathBuilder extends PathBuilder { + override def name = "Default" + + override def build(pathAsString: String): Try[Path] = Try { + val uri = URI.create(pathAsString) + val host = Option(uri.getHost) getOrElse "" + val path = Option(uri.getPath) getOrElse "" + + FileSystems.getDefault.getPath(host, path) + } +} diff --git a/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala new file mode 100644 index 000000000..5339fae3c --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/DefaultPathBuilderFactory.scala @@ -0,0 +1,8 @@ +package cromwell.core.path + +import akka.actor.ActorSystem +import cromwell.core.WorkflowOptions + +case object DefaultPathBuilderFactory extends PathBuilderFactory { + override def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = DefaultPathBuilder +} diff --git a/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala b/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala new file mode 100644 index 000000000..cc1b7f40d --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/JavaWriterImplicits.scala @@ -0,0 +1,13 @@ +package cromwell.core.path + +import java.io.Writer + +object JavaWriterImplicits { + implicit class FlushingAndClosingWriter(writer: Writer) { + /** Convenience method to flush and close in one shot. */ + def flushAndClose() = { + writer.flush() + writer.close() + } + } +} diff --git a/core/src/main/scala/cromwell/core/path/PathBuilder.scala b/core/src/main/scala/cromwell/core/path/PathBuilder.scala new file mode 100644 index 000000000..c21310192 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathBuilder.scala @@ -0,0 +1,10 @@ +package cromwell.core.path + +import java.nio.file.Path + +import scala.util.Try + +trait PathBuilder { + def name: String + def build(pathAsString: String): Try[Path] +} diff --git a/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala new file mode 100644 index 000000000..7ee20eb2d --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathBuilderFactory.scala @@ -0,0 +1,11 @@ +package cromwell.core.path + +import akka.actor.ActorSystem +import cromwell.core.WorkflowOptions + +/** + * Provide a method that can instantiate a path builder with the specified workflow options. + */ +trait PathBuilderFactory { + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem): PathBuilder +} diff --git a/core/src/main/scala/cromwell/core/PathCopier.scala b/core/src/main/scala/cromwell/core/path/PathCopier.scala similarity index 65% rename from core/src/main/scala/cromwell/core/PathCopier.scala rename to core/src/main/scala/cromwell/core/path/PathCopier.scala index f90dad604..58eb29068 100644 --- a/core/src/main/scala/cromwell/core/PathCopier.scala +++ b/core/src/main/scala/cromwell/core/path/PathCopier.scala @@ -1,4 +1,4 @@ -package cromwell.core +package cromwell.core.path import java.io.IOException import java.nio.file.Path @@ -8,8 +8,27 @@ import better.files._ import scala.util.{Failure, Try} object PathCopier { + + /* + * Remove p1 from p2 as long as they match. + */ + private def truncateCommonRoot(p1: Path, p2: Path): String = { + def names(p: Path) = 0 until p.getNameCount map p.getName + + val names1 = names(p1) + + val truncated = names(p2).zipWithIndex.dropWhile { + case (n1, n2) => n2 < names1.size && n1.equals(names1(n2)) + } map { _._1 } + + truncated match { + case empty if empty.isEmpty => "" + case truncs => truncs.reduceLeft(_.resolve(_)).toString + } + } + def getDestinationFilePath(sourceContextPath: Path, sourceFilePath: Path, destinationDirPath: Path): Path = { - val relativeFileString = sourceContextPath.toAbsolutePath.relativize(sourceFilePath.toAbsolutePath).toString + val relativeFileString = truncateCommonRoot(sourceContextPath.toAbsolutePath, sourceFilePath.toAbsolutePath) destinationDirPath.resolve(relativeFileString) } diff --git a/core/src/main/scala/cromwell/core/path/PathFactory.scala b/core/src/main/scala/cromwell/core/path/PathFactory.scala new file mode 100644 index 000000000..ff050b559 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathFactory.scala @@ -0,0 +1,57 @@ +package cromwell.core.path + +import java.nio.file.Path + +import better.files.File + +import scala.util.Success + +/** + * Convenience trait delegating to the PathFactory singleton + */ +trait PathFactory { + /** + * Path builders to be applied (in order) to attempt to build a java.nio.Path from a string. + */ + def pathBuilders: List[PathBuilder] + + /** + * Function applied after a string is successfully resolved to a java.nio.Path + */ + def postMapping(path: Path): Path = path + + /** + * Function applied before a string is attempted to be resolved to a java.nio.Path + */ + def preMapping(string: String): String = string + + /** + * Attempts to build a java.nio.Path from a String + */ + def buildPath(string: String): Path = PathFactory.buildPath(string, pathBuilders, preMapping, postMapping) + + /** + * Attempts to build a better.files.File from a String + */ + def buildFile(string: String): File = PathFactory.buildFile(string, pathBuilders, preMapping, postMapping) +} + +object PathFactory { + /** + * Attempts to build a java.nio.Path from a String + */ + def buildPath(string: String, + pathBuilders: List[PathBuilder], + preMapping: String => String = identity[String], + postMapping: Path => Path = identity[Path]): Path = { + pathBuilders.toStream map { _.build(preMapping(string)) } collectFirst { case Success(p) => postMapping(p) } getOrElse { + val pathBuilderNames: String = pathBuilders map { _.name } mkString ", " + throw new PathParsingException(s"Could not find suitable filesystem among $pathBuilderNames to parse $string.") + } + } + + def buildFile(string: String, + pathBuilders: List[PathBuilder], + preMapping: String => String = identity[String], + postMapping: Path => Path = identity[Path]): File = File(buildPath(string, pathBuilders, preMapping, postMapping)) +} diff --git a/core/src/main/scala/cromwell/core/path/PathImplicits.scala b/core/src/main/scala/cromwell/core/path/PathImplicits.scala new file mode 100644 index 000000000..83d4bff7c --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathImplicits.scala @@ -0,0 +1,15 @@ +package cromwell.core.path + +import java.nio.file.Path + +object PathImplicits { + implicit class EnhancedPath(val path: Path) extends AnyVal { + def swapExt(oldExt: String, newExt: String): Path = { + path.getFileSystem.getPath(s"${path.toString.stripSuffix(oldExt)}$newExt") + } + + def untailed = UntailedWriter(path) + + def tailed(tailedSize: Int) = TailedWriter(path, tailedSize) + } +} diff --git a/core/src/main/scala/cromwell/core/path/PathParsingException.scala b/core/src/main/scala/cromwell/core/path/PathParsingException.scala new file mode 100644 index 000000000..9bf6b5a7c --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathParsingException.scala @@ -0,0 +1,5 @@ +package cromwell.core.path + +import cromwell.core.CromwellFatalException + +case class PathParsingException(message: String) extends CromwellFatalException(new IllegalArgumentException(message)) diff --git a/core/src/main/scala/cromwell/core/path/PathWriter.scala b/core/src/main/scala/cromwell/core/path/PathWriter.scala new file mode 100644 index 000000000..e24fb74fe --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/PathWriter.scala @@ -0,0 +1,76 @@ +package cromwell.core.path + +import java.io.Writer +import java.nio.file.Path + +import scala.collection.immutable.Queue + + + +/** + * Used with a `ProcessLogger`, writes lines with a newline. + */ +trait PathWriter { + import better.files._ + + val path: Path + lazy val writer: Writer = File(path).newBufferedWriter + + /** + * Passed to `ProcessLogger` to add a new line. + * + * @param string Line to add to the logs. + */ + def writeWithNewline(string: String): Unit = { + writer.write(string) + writer.write("\n") + } +} + +/** + * Used with a `ProcessLogger`, writes lines with a newline. + * + * @param path Path to the log file. + */ +case class UntailedWriter(path: Path) extends PathWriter + +/** + * Used with a `ProcessLogger`, queues up the `tailedSize` number of lines. + * + * @param path Path to the log file. + * @param tailedSize Maximum number of lines to save in the internal FIFO queue. + */ +case class TailedWriter(path: Path, tailedSize: Int) extends PathWriter { + private var isTailed = false + private var tailedLines: Queue[String] = Queue.empty + + /** + * Passed to `ProcessLogger` to add a new line, and adds the line to the tailed queue. + * + * @param string Line to add to the logs. + */ + override def writeWithNewline(string: String): Unit = { + tailedLines :+= string + while (tailedLines.size > tailedSize) { + tailedLines = tailedLines.takeRight(tailedSize) + isTailed = true + } + super.writeWithNewline(string) + } + + /** + * Returns a descriptive tail of the `path` and the last `tailedLines` written. + * + * @return a descriptive tail of the `path` and the last `tailedLines` written. + */ + def tailString: String = { + if (tailedLines.isEmpty) { + s"Contents of $path were empty." + } else if (isTailed) { + s"Last ${tailedLines.size} of $path:\n${tailedLines.mkString("\n")}" + } else { + s"Contents of $path:\n${tailedLines.mkString("\n")}" + } + } +} + diff --git a/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala new file mode 100644 index 000000000..f9e9b5817 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/FileSystemProxy.scala @@ -0,0 +1,25 @@ +package cromwell.core.path.proxy + +import java.lang.Iterable +import java.nio.file._ +import java.nio.file.attribute.UserPrincipalLookupService +import java.nio.file.spi.FileSystemProvider +import java.util + +class FileSystemProxy(delegate: FileSystem, injectedProvider: FileSystemProvider) extends FileSystem { + + override def provider(): FileSystemProvider = injectedProvider + + /* delegated */ + override def supportedFileAttributeViews(): util.Set[String] = delegate.supportedFileAttributeViews() + override def getSeparator: String = delegate.getSeparator + override def getRootDirectories: Iterable[Path] = delegate.getRootDirectories + override def newWatchService(): WatchService = delegate.newWatchService() + override def getFileStores: Iterable[FileStore] = delegate.getFileStores + override def isReadOnly: Boolean = delegate.isReadOnly + override def getPath(first: String, more: String*): Path = new PathProxy(delegate.getPath(first, more: _*), this) + override def isOpen: Boolean = delegate.isOpen + override def close(): Unit = delegate.close() + override def getPathMatcher(syntaxAndPattern: String): PathMatcher = delegate.getPathMatcher(syntaxAndPattern) + override def getUserPrincipalLookupService: UserPrincipalLookupService = delegate.getUserPrincipalLookupService +} diff --git a/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala new file mode 100644 index 000000000..28428e0a3 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/PathProxy.scala @@ -0,0 +1,44 @@ +package cromwell.core.path.proxy + +import java.io.File +import java.net.URI +import java.nio.file.WatchEvent.{Kind, Modifier} +import java.nio.file._ +import java.util + +import scala.util.Try + +class PathProxy(delegate: Path, injectedFileSystem: FileSystem) extends Path { + def unbox[T](clazz: Class[T]): Try[T] = Try { + clazz.cast(delegate) + } + + override def getFileSystem: FileSystem = injectedFileSystem + + /* delegated */ + override def subpath(beginIndex: Int, endIndex: Int): Path = delegate.subpath(beginIndex, endIndex) + override def toFile: File = delegate.toFile + override def resolveSibling(other: Path): Path = delegate.resolveSibling(other) + override def resolveSibling(other: String): Path = delegate.resolveSibling(other) + override def isAbsolute: Boolean = delegate.isAbsolute + override def getName(index: Int): Path = delegate.getName(index) + override def getParent: Path = delegate.getParent + override def toAbsolutePath: Path = delegate.toAbsolutePath + override def relativize(other: Path): Path = delegate.relativize(other) + override def getNameCount: Int = delegate.getNameCount + override def toUri: URI = delegate.toUri + override def compareTo(other: Path): Int = delegate.compareTo(other) + override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = delegate.register(watcher, events, modifiers: _*) + override def register(watcher: WatchService, events: Kind[_]*): WatchKey = delegate.register(watcher, events: _*) + override def getFileName: Path = delegate.getFileName + override def getRoot: Path = delegate.getRoot + override def iterator(): util.Iterator[Path] = delegate.iterator() + override def normalize(): Path = delegate.normalize() + override def endsWith(other: Path): Boolean = delegate.endsWith(other) + override def endsWith(other: String): Boolean = delegate.endsWith(other) + override def resolve(other: Path): Path = delegate.resolve(other) + override def resolve(other: String): Path = delegate.resolve(other) + override def startsWith(other: Path): Boolean = delegate.startsWith(other) + override def startsWith(other: String): Boolean = delegate.startsWith(other) + override def toRealPath(options: LinkOption*): Path = delegate.toRealPath(options: _*) +} diff --git a/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala b/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala new file mode 100644 index 000000000..db3975292 --- /dev/null +++ b/core/src/main/scala/cromwell/core/path/proxy/RetryableFileSystemProviderProxy.scala @@ -0,0 +1,57 @@ +package cromwell.core.path.proxy + +import java.net.URI +import java.nio.channels.SeekableByteChannel +import java.nio.file.DirectoryStream.Filter +import java.nio.file._ +import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} +import java.nio.file.spi.FileSystemProvider +import java.util + +import akka.actor.ActorSystem +import cromwell.core.path.CustomRetryParams +import cromwell.core.retry.Retry + +import scala.concurrent.{Await, Future} + +class RetryableFileSystemProviderProxy[T <: FileSystemProvider](delegate: T, retryParams: CustomRetryParams = CustomRetryParams.Default)(implicit actorSystem: ActorSystem) extends FileSystemProvider { + private val iOExecutionContext = actorSystem.dispatchers.lookup("akka.dispatchers.io-dispatcher") + + // the nio interface is synchronous so we need to wait for the result + def withRetry[U](f: () => U): U = Await.result( + Retry.withRetry( + () => Future(f())(iOExecutionContext), + retryParams.maxRetries, + retryParams.backoff, + retryParams.isTransient, + retryParams.isFatal + ), + retryParams.timeout + ) + + override def getPath(uri: URI): Path = { + val path = delegate.getPath(uri) + new PathProxy(path, new FileSystemProxy(path.getFileSystem, this)) + } + override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { + new FileSystemProxy(delegate.newFileSystem(uri, env), this) + } + override def getScheme: String = delegate.getScheme + override def getFileSystem(uri: URI): FileSystem = delegate.getFileSystem(uri) + override def getFileStore(path: Path): FileStore = delegate.getFileStore(path) + + /* retried operations */ + override def move(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.move(source, target, options: _*) } + override def checkAccess(path: Path, modes: AccessMode*): Unit = withRetry { () => delegate.checkAccess(path, modes: _*) } + override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = withRetry { () => delegate.createDirectory(dir, attrs: _*) } + override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = withRetry { () => delegate.newByteChannel(path, options, attrs: _*) } + override def isHidden(path: Path): Boolean = withRetry { () => delegate.isHidden(path) } + override def copy(source: Path, target: Path, options: CopyOption*): Unit = withRetry { () => delegate.copy(source, target, options: _*) } + override def delete(path: Path): Unit = withRetry { () => delegate.delete(path) } + override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = withRetry { () => delegate.newDirectoryStream(dir, filter) } + override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = withRetry { () => delegate.setAttribute(path, attribute, value, options: _*) } + override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = withRetry { () => delegate.readAttributes(path, `type`, options: _*) } + override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = withRetry { () => delegate.readAttributes(path, attributes, options: _*) } + override def isSameFile(path: Path, path2: Path): Boolean = withRetry { () => delegate.isSameFile(path, path2) } + override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = withRetry { () => delegate.getFileAttributeView(path, `type`, options: _*) } +} diff --git a/core/src/main/scala/cromwell/core/retry/Backoff.scala b/core/src/main/scala/cromwell/core/retry/Backoff.scala index af07de595..70b3f82f4 100644 --- a/core/src/main/scala/cromwell/core/retry/Backoff.scala +++ b/core/src/main/scala/cromwell/core/retry/Backoff.scala @@ -4,7 +4,7 @@ import com.google.api.client.util.ExponentialBackOff import scala.concurrent.duration.{Duration, FiniteDuration} -sealed trait Backoff { +trait Backoff { /** Next interval in millis */ def backoffMillis: Long /** Get the next instance of backoff. This should be called after every call to backoffMillis */ diff --git a/core/src/main/scala/cromwell/core/retry/Retry.scala b/core/src/main/scala/cromwell/core/retry/Retry.scala index 7ac181129..002a8d6e5 100644 --- a/core/src/main/scala/cromwell/core/retry/Retry.scala +++ b/core/src/main/scala/cromwell/core/retry/Retry.scala @@ -25,7 +25,7 @@ object Retry { */ def withRetry[A](f: () => Future[A], maxRetries: Option[Int] = Option(10), - backoff: SimpleExponentialBackoff = SimpleExponentialBackoff(5 seconds, 10 seconds, 1.1D), + backoff: Backoff = SimpleExponentialBackoff(5 seconds, 10 seconds, 1.1D), isTransient: Throwable => Boolean = throwableToFalse, isFatal: Throwable => Boolean = throwableToFalse) (implicit actorSystem: ActorSystem): Future[A] = { @@ -38,7 +38,7 @@ object Retry { case throwable if isFatal(throwable) => Future.failed(new CromwellFatalException(throwable)) case throwable if !isFatal(throwable) => val retriesLeft = if (isTransient(throwable)) maxRetries else maxRetries map { _ - 1 } - after(delay, actorSystem.scheduler)(withRetry(f, backoff = backoff, maxRetries = retriesLeft)) + after(delay, actorSystem.scheduler)(withRetry(f, backoff = backoff, maxRetries = retriesLeft, isTransient = isTransient, isFatal = isFatal)) } } else f() recoverWith { case e: Exception => Future.failed(new CromwellFatalException(e)) diff --git a/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala b/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala index d4e04dd0b..774c1b5ce 100644 --- a/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala +++ b/core/src/main/scala/cromwell/core/simpleton/WdlValueBuilder.scala @@ -1,11 +1,11 @@ package cromwell.core.simpleton import wdl4s.TaskOutput -import wdl4s.types.{WdlArrayType, WdlMapType, WdlPrimitiveType, WdlType} -import wdl4s.values.{WdlArray, WdlMap, WdlValue} +import wdl4s.types._ +import wdl4s.values.{WdlArray, WdlMap, WdlOptionalValue, WdlPair, WdlValue} import scala.language.postfixOps -import cromwell.core.{JobOutput, JobOutputs} +import cromwell.core.{CallOutputs, JobOutput} import cromwell.core.simpleton.WdlValueSimpleton._ @@ -73,6 +73,19 @@ object WdlValueBuilder { component.path match { case MapElementPattern(key, more) => key.unescapeMeta -> component.copy(path = more)} } + // Returns a tuple of the key into the pair (i.e. left or right) and a `SimpletonComponent` whose path reflects the "descent" + // into the pair. e.g. for a component + // SimpletonComponent(":left:foo", someValue) this would return (PairLeft -> SimpletonComponent(":baz", someValue)). + sealed trait PairLeftOrRight + case object PairLeft extends PairLeftOrRight + case object PairRight extends PairLeftOrRight + def descendIntoPair(component: SimpletonComponent): (PairLeftOrRight, SimpletonComponent) = { + component.path match { + case MapElementPattern("left", more) => PairLeft -> component.copy(path = more) + case MapElementPattern("right", more) => PairRight -> component.copy(path = more) + } + } + // Group tuples by key using a Map with key type `K`. def group[K](tuples: Traversable[(K, SimpletonComponent)]): Map[K, Traversable[SimpletonComponent]] = { tuples groupBy { case (i, _) => i } mapValues { _ map { case (i, s) => s} } @@ -80,6 +93,12 @@ object WdlValueBuilder { outputType match { case _: WdlPrimitiveType => components collectFirst { case SimpletonComponent(_, v) => v } get + case opt: WdlOptionalType => + if (components.isEmpty) { + WdlOptionalValue(opt.memberType, None) + } else { + WdlOptionalValue(toWdlValue(opt.memberType, components)) + } case arrayType: WdlArrayType => val groupedByArrayIndex: Map[Int, Traversable[SimpletonComponent]] = group(components map descendIntoArray) WdlArray(arrayType, groupedByArrayIndex.toList.sortBy(_._1) map { case (_, s) => toWdlValue(arrayType.memberType, s) }) @@ -87,6 +106,9 @@ object WdlValueBuilder { val groupedByMapKey: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap) // map keys are guaranteed by the WDL spec to be primitives, so the "coerceRawValue(..).get" is safe. WdlMap(mapType, groupedByMapKey map { case (k, ss) => mapType.keyType.coerceRawValue(k).get -> toWdlValue(mapType.valueType, ss) }) + case pairType: WdlPairType => + val groupedByLeftOrRight: Map[PairLeftOrRight, Traversable[SimpletonComponent]] = group(components map descendIntoPair) + WdlPair(toWdlValue(pairType.leftType, groupedByLeftOrRight(PairLeft)), toWdlValue(pairType.rightType, groupedByLeftOrRight(PairRight))) } } @@ -107,7 +129,7 @@ object WdlValueBuilder { */ private case class SimpletonComponent(path: String, value: WdlValue) - def toJobOutputs(taskOutputs: Traversable[TaskOutput], simpletons: Traversable[WdlValueSimpleton]): JobOutputs = { + def toJobOutputs(taskOutputs: Traversable[TaskOutput], simpletons: Traversable[WdlValueSimpleton]): CallOutputs = { toWdlValues(taskOutputs, simpletons) mapValues JobOutput.apply } @@ -119,7 +141,7 @@ object WdlValueBuilder { // This is meant to "rehydrate" simpletonized WdlValues back to WdlValues. It is assumed that these WdlValues were // "dehydrated" to WdlValueSimpletons correctly. This code is not robust to corrupt input whatsoever. - val types = taskOutputs map { o => o.name -> o.wdlType } toMap + val types = taskOutputs map { o => o.unqualifiedName -> o.wdlType } toMap val simpletonsByOutputName = simpletons groupBy { _.simpletonKey match { case IdentifierAndPathPattern(i, _) => i } } val simpletonComponentsByOutputName = simpletonsByOutputName map { case (name, ss) => name -> (ss map simpletonToComponent(name)) } types map { case (name, outputType) => name -> toWdlValue(outputType, simpletonComponentsByOutputName(name))} diff --git a/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala b/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala index c417ad3ee..1f5e04375 100644 --- a/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala +++ b/core/src/main/scala/cromwell/core/simpleton/WdlValueSimpleton.scala @@ -26,8 +26,10 @@ object WdlValueSimpleton { implicit class WdlValueSimplifier(wdlValue: WdlValue) { def simplify(name: String): Iterable[WdlValueSimpleton] = wdlValue match { case prim: WdlPrimitive => List(WdlValueSimpleton(name, prim)) + case opt: WdlOptionalValue => opt.value.map(_.simplify(name)).getOrElse(Seq.empty) case WdlArray(_, arrayValue) => arrayValue.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(s"$name[$index]") } case WdlMap(_, mapValue) => mapValue flatMap { case (key, value) => value.simplify(s"$name:${key.valueString.escapeMeta}") } + case WdlPair(left, right) => left.simplify(s"$name:left") ++ right.simplify(s"$name:right") case wdlObject: WdlObjectLike => wdlObject.value flatMap { case (key, value) => value.simplify(s"$name:${key.escapeMeta}") } case other => throw new Exception(s"Cannot simplify wdl value $other of type ${other.wdlType}") } diff --git a/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala b/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala index 9d8638be0..8a997efb0 100644 --- a/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala +++ b/core/src/main/scala/cromwell/util/JsonFormatting/WdlValueJsonFormatter.scala @@ -17,6 +17,7 @@ object WdlValueJsonFormatter extends DefaultJsonProtocol { case a: WdlArray => new JsArray(a.value.map(write).toVector) case m: WdlMap => new JsObject(m.value map {case(k,v) => k.valueString -> write(v)}) case e: WdlExpression => JsString(e.toWdlString) + case q: WdlPair => new JsObject(Map("left" -> write(q.left), "right" -> write(q.right))) } // NOTE: This assumes a map's keys are strings. Since we're coming from JSON this is fine. diff --git a/core/src/main/scala/cromwell/util/PromiseActor.scala b/core/src/main/scala/cromwell/util/PromiseActor.scala index 6813a53c7..58aea267a 100644 --- a/core/src/main/scala/cromwell/util/PromiseActor.scala +++ b/core/src/main/scala/cromwell/util/PromiseActor.scala @@ -4,6 +4,28 @@ import akka.actor._ import scala.concurrent.{Future, Promise} +private class PromiseActor(promise: Promise[Any], sendTo: ActorRef, msg: Any) extends Actor with ActorLogging { + + context.watch(sendTo) + sendTo ! msg + + override def receive = { + case Status.Failure(f) => + promise.tryFailure(f) + context.stop(self) + case Terminated(actorRef) => + if (actorRef == sendTo) { + promise.tryFailure(new RuntimeException("Promise-watched actor completed before sending back a message")) + } else { + log.error("Spooky happenstances! A Terminated({}) message was sent to a private Promise actor which wasn't watching it!?", actorRef) + } + context.stop(self) + case success => + promise.trySuccess(success) + context.stop(self) + } +} + object PromiseActor { /** * Sends a message to an actor and returns the future associated with the fullfilment of the reply @@ -16,12 +38,11 @@ object PromiseActor { */ private def askNoTimeout(message: Any, sendTo: ActorRef)(implicit actorRefFactory: ActorRefFactory): Future[Any] = { val promise = Promise[Any]() - val promiseActor = actorRefFactory.actorOf(props(promise)) - sendTo.tell(message, promiseActor) + val _ = actorRefFactory.actorOf(props(promise, sendTo, message)) promise.future } - def props(promise: Promise[Any]): Props = Props(new PromiseActor(promise)) + def props(promise: Promise[Any], sendTo: ActorRef, msg: Any): Props = Props(new PromiseActor(promise, sendTo, msg)) implicit class EnhancedActorRef(val actorRef: ActorRef) extends AnyVal { def askNoTimeout(message: Any)(implicit actorRefFactory: ActorRefFactory): Future[Any] = { @@ -29,14 +50,3 @@ object PromiseActor { } } } - -private class PromiseActor(promise: Promise[Any]) extends Actor { - override def receive = { - case Status.Failure(f) => - promise.tryFailure(f) - context.stop(self) - case success => - promise.trySuccess(success) - context.stop(self) - } -} diff --git a/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala b/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala new file mode 100644 index 000000000..4b64e5d8b --- /dev/null +++ b/core/src/main/scala/cromwell/util/StopAndLogSupervisor.scala @@ -0,0 +1,24 @@ +package cromwell.util + +import akka.actor.SupervisorStrategy.{Decider, Stop} +import akka.actor.{Actor, ActorRef, OneForOneStrategy, SupervisorStrategy} +import cromwell.core.logging.WorkflowLogging + +trait StopAndLogSupervisor { this: Actor with WorkflowLogging => + + private var failureLog: Map[ActorRef, Throwable] = Map.empty + + final val stopAndLogStrategy: SupervisorStrategy = { + def stoppingDecider: Decider = { + case e: Exception => + val failer = sender() + failureLog += failer -> e + Stop + } + OneForOneStrategy()(stoppingDecider) + } + + final def getFailureCause(actorRef: ActorRef): Option[Throwable] = failureLog.get(actorRef) + + override final val supervisorStrategy = stopAndLogStrategy +} diff --git a/core/src/main/scala/cromwell/util/TryUtil.scala b/core/src/main/scala/cromwell/util/TryUtil.scala new file mode 100644 index 000000000..18f7ea58a --- /dev/null +++ b/core/src/main/scala/cromwell/util/TryUtil.scala @@ -0,0 +1,45 @@ +package cromwell.util + +import java.io.{PrintWriter, StringWriter} + +import lenthall.exception.ThrowableAggregation + +import scala.util.{Success, Failure, Try} + +case class AggregatedException(exceptions: Seq[Throwable], prefixError: String = "") extends ThrowableAggregation { + override def throwables: Traversable[Throwable] = exceptions + override def exceptionContext: String = prefixError +} + +object TryUtil { + private def stringifyFailure(failure: Try[Any]): String = { + val stringWriter = new StringWriter() + val writer = new PrintWriter(stringWriter) + failure recover { case e => e.printStackTrace(writer) } + writer.flush() + writer.close() + stringWriter.toString + } + + def stringifyFailures[T](possibleFailures: Traversable[Try[T]]): Traversable[String] = + possibleFailures.collect { case failure: Failure[T] => stringifyFailure(failure) } + + private def sequenceIterable[T](tries: Iterable[Try[_]], unbox: () => T, prefixErrorMessage: String) = { + tries collect { case f: Failure[_] => f } match { + case failures if failures.nonEmpty => + val exceptions = failures.toSeq.map(_.exception) + Failure(AggregatedException(exceptions, prefixErrorMessage)) + case _ => Success(unbox()) + } + } + + def sequence[T](tries: Seq[Try[T]], prefixErrorMessage: String = ""): Try[Seq[T]] = { + def unbox = tries map { _.get } + sequenceIterable(tries, unbox _, prefixErrorMessage) + } + + def sequenceMap[T, U](tries: Map[T, Try[U]], prefixErrorMessage: String = ""): Try[Map[T, U]] = { + def unbox = tries mapValues { _.get } + sequenceIterable(tries.values, unbox _, prefixErrorMessage) + } +} diff --git a/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala b/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala new file mode 100644 index 000000000..88dbf463d --- /dev/null +++ b/core/src/test/scala/cromwell/core/path/RetryableFileSystemProxySpec.scala @@ -0,0 +1,278 @@ +package cromwell.core.path + +import java.io.FileNotFoundException +import java.nio.channels.SeekableByteChannel +import java.nio.file.DirectoryStream.Filter +import java.nio.file.attribute.{BasicFileAttributes, FileAttributeView} +import java.nio.file.spi.FileSystemProvider +import java.nio.file.{DirectoryStream, OpenOption, Path, StandardOpenOption} +import java.util.concurrent.TimeoutException + +import cromwell.core.path.proxy.RetryableFileSystemProviderProxy +import cromwell.core.retry.Backoff +import cromwell.core.{CromwellFatalException, TestKitSuite} +import org.mockito.Matchers._ +import org.mockito.Mockito._ +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.{FlatSpecLike, Matchers} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class RetryableFileSystemProxySpec extends TestKitSuite with FlatSpecLike with Matchers { + + behavior of "RetryableFileSystemProxySpec" + + case class ThrowParams(exception: Exception, nbTimes: Int) + + abstract class FileSystemAnswer[T](delay: Option[Duration] = None, + throws: Option[ThrowParams] = None) extends Answer[T] { + + var nbThrows = 0 + + def delayAndOrThrow() = { + delay foreach { d => Thread.sleep(d.toMillis) } + throws foreach { e => + if (nbThrows < e.nbTimes) { + nbThrows = nbThrows + 1 + throw e.exception + } + } + } + } + + def mockFileSystem(delay: Option[Duration] = None, + throws: Option[ThrowParams] = None): FileSystemProvider = { + + val provider = mock(classOf[FileSystemProvider]) + + def answerUnit: Answer[Unit] = new FileSystemAnswer[Unit](delay, throws) { + override def answer(invocation: InvocationOnMock): Unit = delayAndOrThrow() + } + + def answerBoolean: Answer[Boolean] = new FileSystemAnswer[Boolean](delay, throws) { + override def answer(invocation: InvocationOnMock): Boolean = { + delayAndOrThrow() + true + } + } + + def answerSeekableByteChannel: Answer[SeekableByteChannel] = new FileSystemAnswer[SeekableByteChannel](delay, throws) { + override def answer(invocation: InvocationOnMock): SeekableByteChannel = { + delayAndOrThrow() + mock(classOf[SeekableByteChannel]) + } + } + + def answerDirectoryStream: Answer[DirectoryStream[Path]] = new FileSystemAnswer[DirectoryStream[Path]](delay, throws) { + override def answer(invocation: InvocationOnMock): DirectoryStream[Path] = { + delayAndOrThrow() + mock(classOf[DirectoryStream[Path]]) + } + } + + def answerBasicFileAttributes: Answer[BasicFileAttributes] = new FileSystemAnswer[BasicFileAttributes](delay, throws) { + override def answer(invocation: InvocationOnMock): BasicFileAttributes = { + delayAndOrThrow() + mock(classOf[BasicFileAttributes]) + } + } + + def answerMap: Answer[java.util.Map[String, AnyRef]] = new FileSystemAnswer[java.util.Map[String, AnyRef]](delay, throws) { + override def answer(invocation: InvocationOnMock): java.util.Map[String, AnyRef] = { + delayAndOrThrow() + new java.util.HashMap[String, AnyRef]() + } + } + + def answerFileAttributeView: Answer[FileAttributeView] = new FileSystemAnswer[FileAttributeView](delay, throws) { + override def answer(invocation: InvocationOnMock): FileAttributeView = { + delayAndOrThrow() + mock(classOf[FileAttributeView]) + } + } + + when(provider.move(any[Path], any[Path])).thenAnswer(answerUnit) + when(provider.checkAccess(any[Path])).thenAnswer(answerUnit) + when(provider.createDirectory(any[Path])).thenAnswer(answerUnit) + when(provider.newByteChannel(any[Path], any[java.util.Set[OpenOption]])).thenAnswer(answerSeekableByteChannel) + when(provider.isHidden(any[Path])).thenAnswer(answerBoolean) + when(provider.copy(any[Path], any[Path])).thenAnswer(answerUnit) + when(provider.delete(any[Path])).thenAnswer(answerUnit) + when(provider.newDirectoryStream(any[Path], any[Filter[Path]]())).thenAnswer(answerDirectoryStream) + when(provider.setAttribute(any[Path], any[String], any[Object])).thenAnswer(answerUnit) + when(provider.readAttributes(any[Path], any[String])).thenAnswer(answerMap) + when(provider.readAttributes(any[Path], any[Class[BasicFileAttributes]])).thenAnswer(answerBasicFileAttributes) + when(provider.isSameFile(any[Path], any[Path])).thenAnswer(answerBoolean) + when(provider.getFileAttributeView(any[Path], any[Class[FileAttributeView]])).thenAnswer(answerFileAttributeView) + + provider + } + + val testRetryParams = CustomRetryParams.Default.copy(backoff = new Backoff { + override def next: Backoff = this + override def backoffMillis: Long = 0 + }) + + val pathMock = mock(classOf[Path]) + + it should "timeout if the operation takes too long" ignore { + val retryParams = testRetryParams.copy(timeout = 100 millis) + val mockFs = mockFileSystem(delay = Option(200 millis)) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + a[TimeoutException] shouldBe thrownBy(retryableFs.move(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.checkAccess(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.createDirectory(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))) + a[TimeoutException] shouldBe thrownBy(retryableFs.isHidden(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.copy(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.delete(pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))) + a[TimeoutException] shouldBe thrownBy(retryableFs.setAttribute(pathMock, "", "")) + a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])) + a[TimeoutException] shouldBe thrownBy(retryableFs.readAttributes(pathMock, "")) + a[TimeoutException] shouldBe thrownBy(retryableFs.isSameFile(pathMock, pathMock)) + a[TimeoutException] shouldBe thrownBy(retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])) + } + + it should "retry on failure and finally succeed if under retry max" in { + val retryParams = testRetryParams.copy(maxRetries = Option(4)) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new Exception(), nbTimes = 2))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + retryableFs.move(pathMock, pathMock) + retryableFs.checkAccess(pathMock) + retryableFs.createDirectory(pathMock) + retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) + retryableFs.isHidden(pathMock) + retryableFs.copy(pathMock, pathMock) + retryableFs.delete(pathMock) + retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) + retryableFs.setAttribute(pathMock, "", "") + retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) + retryableFs.readAttributes(pathMock, "") + retryableFs.isSameFile(pathMock, pathMock) + retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "retry on failure and fail if over retry max" in { + val retryParams = testRetryParams.copy(maxRetries = Option(2)) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new IllegalArgumentException(), nbTimes = 3))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[IllegalArgumentException] + (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[IllegalArgumentException] + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "ignore transient exceptions" in { + def isTransient(t: Throwable) = t.isInstanceOf[FileNotFoundException] + val retryParams = testRetryParams.copy(maxRetries = Option(1), isTransient = isTransient) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 2))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + retryableFs.move(pathMock, pathMock) + retryableFs.checkAccess(pathMock) + retryableFs.createDirectory(pathMock) + retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]])) + retryableFs.isHidden(pathMock) + retryableFs.copy(pathMock, pathMock) + retryableFs.delete(pathMock) + retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]])) + retryableFs.setAttribute(pathMock, "", "") + retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes]) + retryableFs.readAttributes(pathMock, "") + retryableFs.isSameFile(pathMock, pathMock) + retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView]) + + verify(mockFs, times(3)).move(any[Path], any[Path]) + verify(mockFs, times(3)).checkAccess(any[Path]) + verify(mockFs, times(3)).createDirectory(any[Path]) + verify(mockFs, times(3)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(3)).isHidden(any[Path]) + verify(mockFs, times(3)).copy(any[Path], any[Path]) + verify(mockFs, times(3)).delete(any[Path]) + verify(mockFs, times(3)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(3)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(3)).readAttributes(any[Path], any[String]) + verify(mockFs, times(3)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(3)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(3)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + + it should "fail immediately on fatal exceptions" in { + def isFatal(t: Throwable) = t.isInstanceOf[FileNotFoundException] + val retryParams = testRetryParams.copy(maxRetries = Option(5), isFatal = isFatal) + val mockFs = mockFileSystem(throws = Option(ThrowParams(new FileNotFoundException(), nbTimes = 3))) + val retryableFs = new RetryableFileSystemProviderProxy(mockFs, retryParams)(system) + + (the [CromwellFatalException] thrownBy retryableFs.move(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.checkAccess(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.createDirectory(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.newByteChannel(pathMock, mock(classOf[java.util.Set[StandardOpenOption]]))).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.isHidden(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.copy(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.delete(pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.newDirectoryStream(pathMock, mock(classOf[Filter[Path]]))).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.setAttribute(pathMock, "", "")).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, classOf[BasicFileAttributes])).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.readAttributes(pathMock, "")).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.isSameFile(pathMock, pathMock)).getCause shouldBe a[FileNotFoundException] + (the [CromwellFatalException] thrownBy retryableFs.getFileAttributeView(pathMock, classOf[FileAttributeView])).getCause shouldBe a[FileNotFoundException] + + verify(mockFs, times(1)).move(any[Path], any[Path]) + verify(mockFs, times(1)).checkAccess(any[Path]) + verify(mockFs, times(1)).createDirectory(any[Path]) + verify(mockFs, times(1)).newByteChannel(any[Path], any[java.util.Set[OpenOption]]) + verify(mockFs, times(1)).isHidden(any[Path]) + verify(mockFs, times(1)).copy(any[Path], any[Path]) + verify(mockFs, times(1)).delete(any[Path]) + verify(mockFs, times(1)).newDirectoryStream(any[Path], any[Filter[Path]]()) + verify(mockFs, times(1)).setAttribute(any[Path], any[String], any[Object]) + verify(mockFs, times(1)).readAttributes(any[Path], any[String]) + verify(mockFs, times(1)).readAttributes(any[Path], any[Class[BasicFileAttributes]]) + verify(mockFs, times(1)).isSameFile(any[Path], any[Path]) + verify(mockFs, times(1)).getFileAttributeView(any[Path], any[Class[FileAttributeView]]) + } + +} diff --git a/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala b/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala index eab604c96..1e558ceb1 100644 --- a/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala +++ b/core/src/test/scala/cromwell/core/simpleton/WdlValueBuilderSpec.scala @@ -2,8 +2,10 @@ package cromwell.core.simpleton import cromwell.core.simpleton.WdlValueBuilderSpec._ import org.scalatest.{FlatSpec, Matchers} +import org.specs2.mock.Mockito +import wdl4s.parser.WdlParser.Ast import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlMapType, WdlStringType} -import wdl4s.values.{WdlArray, WdlInteger, WdlMap, WdlString} +import wdl4s.values.{WdlArray, WdlInteger, WdlMap, WdlPair, WdlString, WdlValue} import wdl4s.{TaskOutput, WdlExpression} object WdlValueBuilderSpec { @@ -11,41 +13,118 @@ object WdlValueBuilderSpec { val IgnoredExpression = WdlExpression.fromString(""" "" """) } -class WdlValueBuilderSpec extends FlatSpec with Matchers { +class WdlValueBuilderSpec extends FlatSpec with Matchers with Mockito { - "Builder" should "build" in { - - val wdlValues = Map( - "foo" -> WdlString("none"), - "bar" -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), - "baz" -> WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( + case class SimpletonConversion(name: String, wdlValue: WdlValue, simpletons: Seq[WdlValueSimpleton]) + val simpletonConversions = List( + SimpletonConversion("foo", WdlString("none"), List(WdlValueSimpleton("foo", WdlString("none")))), + SimpletonConversion("bar", WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), List(WdlValueSimpleton("bar[0]", WdlInteger(1)), WdlValueSimpleton("bar[1]", WdlInteger(2)))), + SimpletonConversion( + "baz", + WdlArray(WdlArrayType(WdlArrayType(WdlIntegerType)), List( WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(0), WdlInteger(1))), WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3))))), - "map" -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( + List(WdlValueSimpleton("baz[0][0]", WdlInteger(0)), WdlValueSimpleton("baz[0][1]", WdlInteger(1)), WdlValueSimpleton("baz[1][0]", WdlInteger(2)), WdlValueSimpleton("baz[1][1]", WdlInteger(3))) + ), + SimpletonConversion( + "map", + WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("foo") -> WdlString("foo"), - WdlString("bar") -> WdlString("bar")) - ), - "map2" -> WdlMap(WdlMapType(WdlStringType, WdlMapType(WdlStringType, WdlStringType)), Map( - WdlString("foo") -> - WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("foo2") -> WdlString("foo"))), - WdlString("bar") -> - WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("bar2") -> WdlString("bar"))) - )), - "map3" -> WdlMap(WdlMapType(WdlStringType, WdlArrayType(WdlIntegerType)), Map( + WdlString("bar") -> WdlString("bar"))), + List(WdlValueSimpleton("map:foo", WdlString("foo")), WdlValueSimpleton("map:bar", WdlString("bar"))) + ), + SimpletonConversion( + "mapOfMaps", + WdlMap(WdlMapType(WdlStringType, WdlMapType(WdlStringType, WdlStringType)), Map( + WdlString("foo") -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("foo2") -> WdlString("foo"))), + WdlString("bar") ->WdlMap(WdlMapType(WdlStringType, WdlStringType), Map(WdlString("bar2") -> WdlString("bar"))))), + List(WdlValueSimpleton("mapOfMaps:foo:foo2", WdlString("foo")), WdlValueSimpleton("mapOfMaps:bar:bar2", WdlString("bar"))) + ), + SimpletonConversion( + "simplePair1", + WdlPair(WdlInteger(1), WdlString("hello")), + List(WdlValueSimpleton("simplePair1:left", WdlInteger(1)), WdlValueSimpleton("simplePair1:right", WdlString("hello"))) + ), + SimpletonConversion( + "simplePair2", + WdlPair(WdlString("left"), WdlInteger(5)), + List(WdlValueSimpleton("simplePair2:left", WdlString("left")), WdlValueSimpleton("simplePair2:right", WdlInteger(5))) + ), + SimpletonConversion( + "pairOfPairs", + WdlPair( + WdlPair(WdlInteger(1), WdlString("one")), + WdlPair(WdlString("two"), WdlInteger(2))), + List( + WdlValueSimpleton("pairOfPairs:left:left", WdlInteger(1)), + WdlValueSimpleton("pairOfPairs:left:right", WdlString("one")), + WdlValueSimpleton("pairOfPairs:right:left", WdlString("two")), + WdlValueSimpleton("pairOfPairs:right:right", WdlInteger(2))) + ), + SimpletonConversion( + "pairOfArrayAndMap", + WdlPair( + WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(1), WdlInteger(2))), + WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map(WdlString("left") -> WdlInteger(100), WdlString("right") -> WdlInteger(200)))), + List( + WdlValueSimpleton("pairOfArrayAndMap:left[0]", WdlInteger(1)), + WdlValueSimpleton("pairOfArrayAndMap:left[1]", WdlInteger(2)), + WdlValueSimpleton("pairOfArrayAndMap:right:left", WdlInteger(100)), + WdlValueSimpleton("pairOfArrayAndMap:right:right", WdlInteger(200))) + ), + SimpletonConversion( + "mapOfArrays", + WdlMap(WdlMapType(WdlStringType, WdlArrayType(WdlIntegerType)), Map( WdlString("foo") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(0), WdlInteger(1))), - WdlString("bar") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3)))) - ), - "map4" -> WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( + WdlString("bar") -> WdlArray(WdlArrayType(WdlIntegerType), List(WdlInteger(2), WdlInteger(3))))), + List(WdlValueSimpleton("mapOfArrays:foo[0]", WdlInteger(0)), WdlValueSimpleton("mapOfArrays:foo[1]", WdlInteger(1)), + WdlValueSimpleton("mapOfArrays:bar[0]", WdlInteger(2)), WdlValueSimpleton("mapOfArrays:bar[1]", WdlInteger(3))) + ), + SimpletonConversion( + "escapology", + WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("foo[1]") -> WdlString("foo"), WdlString("bar[[") -> WdlString("bar"), - WdlString("baz:qux") -> WdlString("baz:qux") - )) + WdlString("baz:qux") -> WdlString("baz:qux"))), + List(WdlValueSimpleton("escapology:foo\\[1\\]", WdlString("foo")), + WdlValueSimpleton("escapology:bar\\[\\[", WdlString("bar")), + WdlValueSimpleton("escapology:baz\\:qux", WdlString("baz:qux"))) ) + ) + + behavior of "WdlValueSimpleton and WdlValueBuilder" + + simpletonConversions foreach { case SimpletonConversion(name, wdlValue, simpletons) => + it should s"decompose WdlValues into simpletons ($name)" in { + import WdlValueSimpleton._ - val taskOutputs = wdlValues map { case (k, wv) => TaskOutput(k, wv.wdlType, IgnoredExpression) } + val map = Map(name -> wdlValue) + map.simplify should contain theSameElementsAs simpletons + } + + it should s"build simpletons back into WdlValues ($name)" in { + // The task output is used to tell us the type of output we're expecting: + val taskOutputs = List(TaskOutput(name, wdlValue.wdlType, IgnoredExpression, mock[Ast], None)) + val rebuiltValues = WdlValueBuilder.toWdlValues(taskOutputs, simpletons) + rebuiltValues.size should be(1) + rebuiltValues(name) should be(wdlValue) + } + + } + + + it should "round trip everything together with no losses" in { + + val wdlValues = (simpletonConversions map { case SimpletonConversion(name, wdlValue, simpletons) => name -> wdlValue }).toMap + val taskOutputs = wdlValues map { case (k, wv) => TaskOutput(k, wv.wdlType, IgnoredExpression, mock[Ast], None) } + val allSimpletons = simpletonConversions flatMap { case SimpletonConversion(name, wdlValue, simpletons) => simpletons } import WdlValueSimpleton._ - val actual = WdlValueBuilder.toWdlValues(taskOutputs, wdlValues.simplify) + + val actualSimpletons = wdlValues.simplify + actualSimpletons should contain theSameElementsAs allSimpletons + + val actual = WdlValueBuilder.toWdlValues(taskOutputs, actualSimpletons) actual shouldEqual wdlValues } } diff --git a/core/src/test/scala/cromwell/util/SampleWdl.scala b/core/src/test/scala/cromwell/util/SampleWdl.scala index dc2598cf1..2e4d77399 100644 --- a/core/src/test/scala/cromwell/util/SampleWdl.scala +++ b/core/src/test/scala/cromwell/util/SampleWdl.scala @@ -4,7 +4,7 @@ import java.nio.file.{Files, Path} import java.util.UUID import better.files._ -import cromwell.core.WorkflowSourceFiles +import cromwell.core.{WorkflowSourceFilesWithoutImports} import spray.json._ import wdl4s._ import wdl4s.types.{WdlArrayType, WdlStringType} @@ -15,7 +15,7 @@ import scala.language.postfixOps trait SampleWdl extends TestFileUtil { def wdlSource(runtime: String = ""): WdlSource def asWorkflowSources(runtime: String = "", workflowOptions: String = "{}") = - WorkflowSourceFiles(wdlSource(runtime), wdlJson, workflowOptions) + WorkflowSourceFilesWithoutImports(wdlSource(runtime), wdlJson, workflowOptions) val rawInputs: WorkflowRawInputs def name = getClass.getSimpleName.stripSuffix("$") @@ -74,14 +74,14 @@ object SampleWdl { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin.replaceAll("RUNTIME", runtime) - val Addressee = "hello.hello.addressee" + val Addressee = "wf_hello.hello.addressee" val rawInputs = Map(Addressee -> "world") - val OutputKey = "hello.hello.salutation" + val OutputKey = "wf_hello.hello.salutation" val OutputValue = "Hello world!" } @@ -117,7 +117,7 @@ object SampleWdl { | } |} | - |workflow goodbye { + |workflow wf_goodbye { | call goodbye |} """.stripMargin @@ -147,9 +147,10 @@ object SampleWdl { | output { | String empty = read_string(stdout()) | } + | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello | call goodbye {input: emptyInputString=hello.empty } | output { @@ -509,7 +510,7 @@ object SampleWdl { | RUNTIME |} | - |workflow whereami { + |workflow wf_whereami { | call whereami |} """.stripMargin.replaceAll("RUNTIME", runtime) @@ -661,7 +662,7 @@ object SampleWdl { class ScatterWdl extends SampleWdl { val tasks = s"""task A { | command { - | echo -n -e "jeff\nchris\nmiguel\nthibault\nkhalid\nscott" + | echo -n -e "jeff\nchris\nmiguel\nthibault\nkhalid\nruchi" | } | RUNTIME | output { diff --git a/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala b/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala new file mode 100644 index 000000000..91d678c01 --- /dev/null +++ b/core/src/test/scala/cromwell/util/WdlValueJsonFormatterSpec.scala @@ -0,0 +1,28 @@ +package cromwell.util + +import scala.Vector + +import org.scalatest.FlatSpec +import org.scalatest.Matchers + +import JsonFormatting.WdlValueJsonFormatter.WdlValueJsonFormat +import spray.json.{ JsObject, pimpString } +import wdl4s.types.{ WdlArrayType, WdlStringType } +import wdl4s.values.{ WdlArray, WdlPair, WdlString } + +class WdlValueJsonFormatterSpec extends FlatSpec with Matchers { + + behavior of "WdlValueJsonFormat" + + it should "write WdlPair to left/right structured JsObject" in { + val left = "sanders" + val right = Vector("rubio", "carson", "cruz") + val wdlPair = WdlPair(WdlString(left), WdlArray(WdlArrayType(WdlStringType), right.map { WdlString(_) })) + val ExpectedJson: JsObject = + """|{ + | "left": "sanders", + | "right": ["rubio", "carson", "cruz"] + |}""".stripMargin.parseJson.asJsObject + WdlValueJsonFormat.write(wdlPair) should matchPattern { case ExpectedJson => } + } +} diff --git a/database/migration/src/main/resources/changelog.xml b/database/migration/src/main/resources/changelog.xml index 251e15d79..c67137ae0 100644 --- a/database/migration/src/main/resources/changelog.xml +++ b/database/migration/src/main/resources/changelog.xml @@ -45,10 +45,15 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml b/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml new file mode 100644 index 000000000..bd6d3f051 --- /dev/null +++ b/database/migration/src/main/resources/changesets/workflow_store_imports_file.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + + diff --git a/database/migration/src/main/resources/logback.xml b/database/migration/src/main/resources/logback.xml deleted file mode 100644 index fa27b5dde..000000000 --- a/database/migration/src/main/resources/logback.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - %date %X{sourceThread} %-5level - %msg%n - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala new file mode 100644 index 000000000..9339dc5d7 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/BatchedTaskChange.scala @@ -0,0 +1,139 @@ +package cromwell.database.migration.custom + +import java.sql.{PreparedStatement, ResultSet} + +import liquibase.database.jvm.JdbcConnection +import liquibase.exception.CustomChangeException + +/** + * Runs a migration as a series of batches. + */ +trait BatchedTaskChange extends MigrationTaskChange { + /** + * Returns sql to retrieve the maximum primary key for the table. + * + * Example: + * {{{ + * SELECT MAX([PRIMARY_KEY]) + * FROM [TABLE]; + * }}} + */ + def readCountQuery: String + + /** + * Returns sql to retrieve rows to be passed to migrateBatchRow, batching on a primary key between the half-open + * primary key range [start, stop). + * + * Example: + * {{{ + * SELECT [COLUMNS] + * FROM [TABLE] + * WHERE [PRIMARY_KEY] >= ? AND [PRIMARY_KEY] < ?; + * }}} + */ + def readBatchQuery: String + + /** + * Used to prepare the statement that will be passed repeatedly into migrateBatchRow. + * + * Example: + * {{{ + * UPDATE [TABLE] + * SET [COLUMNS] + * WHERE [PRIMARY_KEY] = ?; + * }}} + * + * Example: + * {{{ + * INSERT INTO [TABLE] + * SET [COLUMNS]; + * }}} + */ + def migrateBatchQuery: String + + /** + * Migrate a row. + * + * Read the values from readRow, update the values, set the updated values on the migrateStatement, and then call + * migrateStatement.addBatch(). Return the (estimated) number of rows to be written by this batch. + * + * @param readRow The row to migrate + * @param migrateStatement The statement to add a new migrated row + * @return The number of rows updated + */ + def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int + + /** + * Specify the size of a "page". + * For databases with a very large number of rows, selecting all the rows at once can generate a variety of problems. + * In order to avoid any issue, the selection is paginated. This value sets how many rows should be retrieved and + * processed at a time, before asking for the next page. + */ + private val readBatchSize = config.getInt("database.migration.read-batch-size") + + /** + * To keep the size of the insert batch from growing out of control we monitor its size and execute/commit when it + * reaches or exceeds writeBatchSize. + */ + private val writeBatchSize = config.getInt("database.migration.write-batch-size") + + override def migrate(connection: JdbcConnection) = { + + logger.info(s"Running migration $migrationName with a read batch size of " + + s"$readBatchSize and a write batch size of $writeBatchSize") + + /* + * Keep count of the size of the batch. + * + * @see writeBatchSize + */ + var batchMigrationCounter: Int = 0 + + val readCount = getReadCount(connection) + + // So we can display progress + val pageCount = Math.max(readCount / readBatchSize, 1) + + val readBatchStatement = connection.prepareStatement(readBatchQuery) + val migrateBatchStatement = connection.prepareStatement(migrateBatchQuery) + + val paginator = new QueryPaginator(readBatchStatement, readBatchSize, readCount) + + // Loop over pages + paginator.zipWithIndex foreach { + case (resultBatch, page) => + // Loop over rows in page + new ResultSetIterator(resultBatch).zipWithIndex foreach { + case (row, idx) => + batchMigrationCounter += migrateBatchRow(row, migrateBatchStatement) + // batchMigrationCounter can actually be bigger than writeBatchSize as wdlValues are processed atomically, + // so this is a best effort + if (batchMigrationCounter >= writeBatchSize) { + migrateBatchStatement.executeBatch() + connection.commit() + batchMigrationCounter = 0 + } + } + + resultBatch.close() + + val progress = Math.min((page + 1) * 100 / pageCount, 100) + logger.info(s"[$migrationName] $progress%") + } + + if (batchMigrationCounter != 0) { + migrateBatchStatement.executeBatch() + connection.commit() + } + } + + private def getReadCount(connection: JdbcConnection): Int = { + val readCountResultSet = connection.createStatement().executeQuery(readCountQuery) + + if (readCountResultSet.next()) { + readCountResultSet.getInt(1) + } else { + throw new CustomChangeException(s"Could not find max value for pagination from sql:\n$readCountQuery") + } + } +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala new file mode 100644 index 000000000..4fa7935e6 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/MigrationTaskChange.scala @@ -0,0 +1,48 @@ +package cromwell.database.migration.custom + +import com.typesafe.config.ConfigFactory +import com.typesafe.scalalogging.LazyLogging +import liquibase.change.custom.CustomTaskChange +import liquibase.database.Database +import liquibase.database.jvm.JdbcConnection +import liquibase.exception.{CustomChangeException, ValidationErrors} +import liquibase.resource.ResourceAccessor + +/** + * Provides a default implementation of a liquibase custom task change. + */ +trait MigrationTaskChange extends CustomTaskChange with LazyLogging { + lazy val config = ConfigFactory.load + + /** @return name of the migration, defaulting to the class name */ + def migrationName: String = getClass.getSimpleName + + /** + * Performs the migration. + * + * @param connection the connection to the database + */ + def migrate(connection: JdbcConnection): Unit + + override def execute(database: Database): Unit = { + try { + val dbConn = database.getConnection.asInstanceOf[JdbcConnection] + val autoCommit = dbConn.getAutoCommit + dbConn.setAutoCommit(false) + migrate(dbConn) + dbConn.setAutoCommit(autoCommit) + } catch { + case customChangeException: CustomChangeException => throw customChangeException + case exception: Exception => + throw new CustomChangeException(s"Could not apply migration script for $migrationName", exception) + } + } + + override def setUp() = {} + + override def getConfirmationMessage = s"$migrationName complete." + + override def validate(database: Database) = new ValidationErrors + + override def setFileOpener(resourceAccessor: ResourceAccessor) = {} +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala similarity index 88% rename from database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala rename to database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala index f7929cea4..0e6514043 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/QueryPaginator.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/QueryPaginator.scala @@ -1,4 +1,4 @@ -package cromwell.database.migration.metadata.table.symbol +package cromwell.database.migration.custom import java.sql.{PreparedStatement, ResultSet} diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala b/database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala similarity index 73% rename from database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala rename to database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala index 8658a59da..a9b229625 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/ResultSetIterator.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/custom/ResultSetIterator.scala @@ -1,4 +1,4 @@ -package cromwell.database.migration.metadata.table.symbol +package cromwell.database.migration.custom import java.sql.ResultSet diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala index fbd5529c1..09cf42dbf 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/CallOutputSymbolTableMigration.scala @@ -6,7 +6,6 @@ import wdl4s.values._ class CallOutputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala index 81756d017..5740ac3d9 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/InputSymbolTableMigration.scala @@ -7,7 +7,6 @@ import wdl4s.values._ class InputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala index a938ebcdf..311b364fe 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/MetadataStatement.scala @@ -4,7 +4,6 @@ import java.sql.{PreparedStatement, Timestamp, Types} import java.time.format.DateTimeFormatter import java.time.{OffsetDateTime, ZoneId, ZoneOffset} -import liquibase.database.jvm.JdbcConnection import org.slf4j.LoggerFactory import wdl4s.values.{WdlBoolean, WdlFloat, WdlInteger, WdlValue} @@ -18,12 +17,12 @@ object MetadataStatement { val TimestampIdx = 7 val ValueTypeIdx = 8 - def makeStatement(connection: JdbcConnection): PreparedStatement = connection.prepareStatement( + val InsertSql = """ |INSERT INTO METADATA_JOURNAL |(WORKFLOW_EXECUTION_UUID, METADATA_KEY, CALL_FQN, JOB_SCATTER_INDEX, JOB_RETRY_ATTEMPT, METADATA_VALUE, METADATA_TIMESTAMP, METADATA_VALUE_TYPE) |VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """.stripMargin) + """.stripMargin implicit class OffsetDateTimeToSystemTimestamp(val offsetDateTime: OffsetDateTime) extends AnyVal { def toSystemTimestamp = Timestamp.valueOf(offsetDateTime.atZoneSameInstant(ZoneId.systemDefault).toLocalDateTime) diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala index a400866a9..19bbc1279 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/SymbolTableMigration.scala @@ -2,15 +2,9 @@ package cromwell.database.migration.metadata.table.symbol import java.sql.{PreparedStatement, ResultSet} -import com.typesafe.config.ConfigFactory import cromwell.core.simpleton.WdlValueSimpleton._ import cromwell.database.migration.WdlTransformation -import liquibase.change.custom.CustomTaskChange -import liquibase.database.Database -import liquibase.database.jvm.JdbcConnection -import liquibase.exception.{CustomChangeException, ValidationErrors} -import liquibase.resource.ResourceAccessor -import org.slf4j.LoggerFactory +import cromwell.database.migration.custom.BatchedTaskChange import wdl4s.WdlExpression import wdl4s.types.WdlType import wdl4s.values.WdlValue @@ -25,42 +19,12 @@ object SymbolTableMigration { """.stripMargin } -trait SymbolTableMigration extends CustomTaskChange { - import SymbolTableMigration._ +trait SymbolTableMigration extends BatchedTaskChange { import cromwell.database.migration.WdlTransformation._ - // Nb of rows to retrieve / process in a batch - val config = ConfigFactory.load + override val readCountQuery = SymbolTableMigration.NbRowsQuery - /** - * Specify the size of a "page". - * For databases with a very large number of symbols, selecting all the rows at once can generate a variety of problems. - * In order to avoid any issue, the selection is paginated. This value sets how many rows should be retrieved and processed at a time, before asking for the next chunk. - */ - val readBatchSize = config.getInt("database.migration.read-batch-size") - - /** - * Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single symbol row can vary from 1 to several thousands (or more). - * To keep the size of the insert batch from growing out of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize. - */ - val writeBatchSize = config.getInt("database.migration.write-batch-size") - - val logger = LoggerFactory.getLogger("LiquibaseMetadataMigration") - - override def execute(database: Database): Unit = { - try { - val dbConn = database.getConnection.asInstanceOf[JdbcConnection] - val autoCommit = dbConn.getAutoCommit - dbConn.setAutoCommit(false) - migrate(dbConn) - dbConn.setAutoCommit(autoCommit) - } catch { - case t: CustomChangeException => throw t - case t: Throwable => throw new CustomChangeException(s"Could not apply migration script for metadata at ${getClass.getSimpleName}", t) - } - } - - def tmpSymbolPaginatedStatement(connection: JdbcConnection): PreparedStatement = connection.prepareStatement(""" + override val readBatchQuery = """ |SELECT | WORKFLOW_EXECUTION_UUID, | SYMBOL_NAME, @@ -71,64 +35,14 @@ trait SymbolTableMigration extends CustomTaskChange { | WDL_VALUE | FROM TMP_SYMBOL | WHERE TMP_SYMBOL_ID >= ? AND TMP_SYMBOL_ID < ?; - """.stripMargin) - - private def migrate(connection: JdbcConnection) = { - logger.info(s"Running migration with a read batch size of $readBatchSize and a write batch size of $writeBatchSize") - - /** - * Keep count of the size of the batch. - * - * @see writeBatchSize - */ - var insertsCounter: Int = 0 - - // Find the max row id in the TMP_SYMBOL table - val tmpSymbolCountRS = connection.createStatement().executeQuery(NbRowsQuery) - - if (tmpSymbolCountRS.next()) { - val tmpSymbolCount = tmpSymbolCountRS.getInt("symbol_count") - - // So we can display progress - val nbPages = Math.max(tmpSymbolCount / readBatchSize, 1) - - val paginator = new QueryPaginator(tmpSymbolPaginatedStatement(connection), readBatchSize, tmpSymbolCount) - val metadataInsertStatement = MetadataStatement.makeStatement(connection) - - // Loop over pages - paginator.zipWithIndex foreach { - case (resultBatch, page) => - // Loop over rows in page - new ResultSetIterator(resultBatch).zipWithIndex foreach { - case (row, idx) => - insertsCounter += migrateRow(connection, metadataInsertStatement, row, idx) - // insertsCounter can actually be bigger than writeBatchSize as wdlValues are processed atomically, so this is a best effort - if (insertsCounter >= writeBatchSize) { - metadataInsertStatement.executeBatch() - connection.commit() - insertsCounter = 0 - } - } - - resultBatch.close() - - val progress = Math.min((page + 1) * 100 / nbPages, 100) - logger.info(s"[${getClass.getSimpleName}] $progress%") - } + """.stripMargin - if (insertsCounter != 0) { - metadataInsertStatement.executeBatch() - connection.commit() - } - } else { - throw new CustomChangeException("Could not find max value of symbol id for pagination") - } - } + override val migrateBatchQuery = MetadataStatement.InsertSql /** * Migrate a row to the metadata table */ - protected def migrateRow(connection: JdbcConnection, statement: PreparedStatement, row: ResultSet, idx: Int): Int = { + override def migrateBatchRow(row: ResultSet, statement: PreparedStatement): Int = { // Try to coerce the value to a WdlValue val value = for { wdlType <- Try(WdlType.fromWdlString(row.getString("WDL_TYPE"))) @@ -147,7 +61,7 @@ trait SymbolTableMigration extends CustomTaskChange { value match { case Success(wdlValue) => - processSymbol(statement, idx, workflowUuid, symbolName, symbolScope, symbolIndex, symbolAttempt, wdlValue) + processSymbol(statement, workflowUuid, symbolName, symbolScope, symbolIndex, symbolAttempt, wdlValue) case Failure(f) => logger.error( s"""Could not parse symbol of type ${row.getString("WDL_TYPE")} @@ -157,7 +71,6 @@ trait SymbolTableMigration extends CustomTaskChange { } def processSymbol(statement: PreparedStatement, - idx: Int, workflowUuid: String, symbolName: String, symbolScope: String, @@ -165,12 +78,6 @@ trait SymbolTableMigration extends CustomTaskChange { symbolAttempt: Option[Int], wdlValue: WdlValue): Int - override def setUp(): Unit = () - - override def validate(database: Database): ValidationErrors = new ValidationErrors - - override def setFileOpener(resourceAccessor: ResourceAccessor): Unit = {} - /** * Add all necessary statements to the batch for the provided WdlValue. */ diff --git a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala index f188fc0cd..269d58a24 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/metadata/table/symbol/WorkflowOutputSymbolTableMigration.scala @@ -7,7 +7,6 @@ import wdl4s.values._ class WorkflowOutputSymbolTableMigration extends SymbolTableMigration { override def processSymbol(statement: PreparedStatement, - rowIndex: Int, workflowUuid: String, symbolName: String, symbolScope: String, diff --git a/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala b/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala index 3852750ce..c0610ae36 100644 --- a/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala +++ b/database/migration/src/main/scala/cromwell/database/migration/restart/table/RenameWorkflowOptionKeysMigration.scala @@ -1,19 +1,11 @@ package cromwell.database.migration.restart.table +import cromwell.database.migration.workflowoptions.WorkflowOptionsRenaming._ import cromwell.database.migration.restart.table.RenameWorkflowOptionKeysMigration._ import liquibase.database.jvm.JdbcConnection import spray.json._ - object RenameWorkflowOptionKeysMigration { - private val RenamedOptionKeys = Map( - "defaultRuntimeOptions" -> "default_runtime_attributes", - "workflowFailureMode" -> "workflow_failure_mode", - "workflow_log_dir" -> "final_workflow_log_dir", - "outputs_path" -> "final_workflow_outputs_dir", - "call_logs_dir" -> "final_call_logs_dir" - ) - private val QueryWorkflowStore = " SELECT WORKFLOW_STORE_ID, WORKFLOW_OPTIONS FROM WORKFLOW_STORE " private val UpdateWorkflowStore = " UPDATE WORKFLOW_STORE SET WORKFLOW_OPTIONS = ? WHERE WORKFLOW_STORE_ID = ? " @@ -25,14 +17,6 @@ class RenameWorkflowOptionKeysMigration extends AbstractRestartMigration { override protected def description: String = "Workflow option renaming" override protected def doMigration(connection: JdbcConnection): Unit = { - - def renameOptionKeys(field: JsField): JsField = { - field match { - case (oldName, value) if RenamedOptionKeys.contains(oldName) => RenamedOptionKeys(oldName) -> value - case noop => noop - } - } - val query = connection.createStatement() lazy val insert = connection.prepareStatement(UpdateWorkflowStore) query.execute(QueryWorkflowStore) diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala new file mode 100644 index 000000000..4d9263491 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/ClearMetadataEntryWorkflowOptions.scala @@ -0,0 +1,15 @@ +package cromwell.database.migration.workflowoptions + +import cromwell.core.WorkflowOptions + +/** + * Clear the values from encrypted keys in METADATA_ENTRY. + */ +class ClearMetadataEntryWorkflowOptions extends WorkflowOptionsChange { + override val tableName = "METADATA_ENTRY" + override val primaryKeyColumn = "METADATA_JOURNAL_ID" + override val workflowOptionsColumn = "METADATA_VALUE" + override val additionalReadBatchFilters = "AND METADATA_KEY = 'submittedFiles:options'" + + override def migrateWorkflowOptions(workflowOptions: WorkflowOptions) = workflowOptions.clearEncryptedValues +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala new file mode 100644 index 000000000..9c3cc7490 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/EncryptWorkflowStoreEntryWorkflowOptions.scala @@ -0,0 +1,14 @@ +package cromwell.database.migration.workflowoptions + +import cromwell.core.WorkflowOptions + +/** + * Encrypt the values for encrypted keys in WORKFLOW_STORE_ENTRY. + */ +class EncryptWorkflowStoreEntryWorkflowOptions extends WorkflowOptionsChange { + override val tableName = "WORKFLOW_STORE_ENTRY" + override val primaryKeyColumn = "WORKFLOW_STORE_ENTRY_ID" + override val workflowOptionsColumn = "WORKFLOW_OPTIONS" + + override def migrateWorkflowOptions(workflowOptions: WorkflowOptions) = workflowOptions.asPrettyJson +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala new file mode 100644 index 000000000..e6842b459 --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/RenameWorkflowOptionsInMetadata.scala @@ -0,0 +1,38 @@ +package cromwell.database.migration.workflowoptions + +import java.sql.{PreparedStatement, ResultSet} + +import cromwell.database.migration.custom.BatchedTaskChange +import cromwell.database.migration.workflowoptions.WorkflowOptionsRenaming._ +import spray.json.{JsObject, _} + +class RenameWorkflowOptionsInMetadata extends BatchedTaskChange { + val tableName = "METADATA_ENTRY" + val primaryKeyColumn = "METADATA_JOURNAL_ID" + val workflowOptionsColumn = "METADATA_VALUE" + val additionalReadBatchFilters = "AND METADATA_KEY = 'submittedFiles:options'" + + override def readCountQuery = s"SELECT MAX($primaryKeyColumn) FROM $tableName;" + + override def readBatchQuery = + s"""|SELECT $primaryKeyColumn, $workflowOptionsColumn + | FROM $tableName + | WHERE $primaryKeyColumn >= ? AND $primaryKeyColumn < ? $additionalReadBatchFilters; + |""".stripMargin + + override def migrateBatchQuery = s"UPDATE $tableName SET $workflowOptionsColumn = ? WHERE $primaryKeyColumn = ?;" + + override def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int = { + val rowId = readRow.getInt(1) + + val migratedJson = readRow.getString(2).parseJson match { + case JsObject(fields) => JsObject(fields map renameOptionKeys) + case other => other + } + + migrateStatement.setString(1, migratedJson.prettyPrint) + migrateStatement.setInt(2, rowId) + migrateStatement.addBatch() + 1 + } +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala new file mode 100644 index 000000000..3c1b6b68f --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsChange.scala @@ -0,0 +1,69 @@ +package cromwell.database.migration.workflowoptions + +import java.sql.{PreparedStatement, ResultSet} + +import cromwell.core.WorkflowOptions +import cromwell.database.migration.custom.BatchedTaskChange +import liquibase.database.Database + +import scala.util.{Failure, Success} + +/** + * Edits the workflow options stored in a table. + */ +trait WorkflowOptionsChange extends BatchedTaskChange { + /** @return name of the table */ + def tableName: String + + /** @return primary key of the table */ + def primaryKeyColumn: String + + /** @return column storing the workflow options */ + def workflowOptionsColumn: String + + /** @return any additional filters to add to the where clause, starting with "AND ..." */ + def additionalReadBatchFilters: String = "" + + /** + * Takes in the workflow options and returns the edited version as a json string. + * + * @param workflowOptions workflow options object + * @return edited workflow object json + */ + def migrateWorkflowOptions(workflowOptions: WorkflowOptions): String + + override def execute(database: Database): Unit = { + val configPath = "workflow-options.encrypted-fields" + if (config.hasPath(configPath) && !config.getStringList(configPath).isEmpty) { + super.execute(database) + } + } + + override def readCountQuery = s"SELECT MAX($primaryKeyColumn) FROM $tableName;" + + override def readBatchQuery = + s"""|SELECT $primaryKeyColumn, $workflowOptionsColumn + | FROM $tableName + | WHERE $primaryKeyColumn >= ? AND $primaryKeyColumn < ? $additionalReadBatchFilters; + |""".stripMargin + + override def migrateBatchQuery = s"UPDATE $tableName SET $workflowOptionsColumn = ? WHERE $primaryKeyColumn = ?;" + + override def migrateBatchRow(readRow: ResultSet, migrateStatement: PreparedStatement): Int = { + val rowId = readRow.getInt(1) + val workflowOptionsJson = readRow.getString(2) + WorkflowOptions.fromJsonString(workflowOptionsJson) match { + case Success(workflowOptions) => + val migratedJson = migrateWorkflowOptions(workflowOptions) + migrateStatement.setString(1, migratedJson) + migrateStatement.setInt(2, rowId) + migrateStatement.addBatch() + 1 + case Failure(exception) => + logger.error( + s"Unable to process $tableName pk $rowId\njson:\n$workflowOptionsJson", exception) + 0 + } + } + +} diff --git a/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala new file mode 100644 index 000000000..d148a570f --- /dev/null +++ b/database/migration/src/main/scala/cromwell/database/migration/workflowoptions/WorkflowOptionsRenaming.scala @@ -0,0 +1,21 @@ +package cromwell.database.migration.workflowoptions + +import spray.json._ + +object WorkflowOptionsRenaming { + + private val RenamedOptionKeys = Map( + "defaultRuntimeOptions" -> "default_runtime_attributes", + "workflowFailureMode" -> "workflow_failure_mode", + "workflow_log_dir" -> "final_workflow_log_dir", + "outputs_path" -> "final_workflow_outputs_dir", + "call_logs_dir" -> "final_call_logs_dir" + ) + + def renameOptionKeys(field: JsField): JsField = { + field match { + case (oldName, value) if RenamedOptionKeys.contains(oldName) => RenamedOptionKeys(oldName) -> value + case noop => noop + } + } +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala index ce40e4e6a..deb7b6d5e 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/CallCachingSlickDatabase.scala @@ -5,6 +5,7 @@ import cromwell.database.sql._ import cromwell.database.sql.joins.CallCachingJoin import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps trait CallCachingSlickDatabase extends CallCachingSqlDatabase { this: SlickDatabase => diff --git a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala index 80a4413fd..173de0e89 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/SlickDatabase.scala @@ -58,7 +58,8 @@ class SlickDatabase(override val originalDatabaseConfig: Config) extends SqlData with JobKeyValueSlickDatabase with JobStoreSlickDatabase with CallCachingSlickDatabase - with SummaryStatusSlickDatabase { + with SummaryStatusSlickDatabase + with SubWorkflowStoreSlickDatabase { override val urlKey = SlickDatabase.urlKey(originalDatabaseConfig) private val slickConfig = DatabaseConfig.forConfig[JdbcProfile]("", databaseConfig) diff --git a/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala b/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala new file mode 100644 index 000000000..05d216141 --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/slick/SubWorkflowStoreSlickDatabase.scala @@ -0,0 +1,67 @@ +package cromwell.database.slick + +import cats.instances.future._ +import cats.syntax.functor._ +import cromwell.database.sql.SubWorkflowStoreSqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} +import scala.language.postfixOps + +trait SubWorkflowStoreSlickDatabase extends SubWorkflowStoreSqlDatabase { + this: SlickDatabase => + + import dataAccess.driver.api._ + + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] = { + val action = for { + workflowStoreEntry <- dataAccess.workflowStoreEntriesForWorkflowExecutionUuid(rootWorkflowExecutionUuid).result.headOption + _ <- workflowStoreEntry match { + case Some(rootWorkflow) => + dataAccess.subWorkflowStoreEntryIdsAutoInc += + SubWorkflowStoreEntry( + rootWorkflow.workflowStoreEntryId, + parentWorkflowExecutionUuid, + callFullyQualifiedName, + jobIndex, + jobAttempt, + subWorkflowExecutionUuid + ) + case None => DBIO.failed(new IllegalArgumentException(s"Could not find root workflow with UUID $rootWorkflowExecutionUuid")) + } + } yield () + + runTransaction(action) void + } + + override def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] = { + val action = for { + subWorkflowStoreEntryOption <- dataAccess.subWorkflowStoreEntriesForJobKey( + (parentWorkflowExecutionUuid, callFqn, jobIndex, jobAttempt) + ).result.headOption + } yield subWorkflowStoreEntryOption + + runTransaction(action) + } + + override def removeSubWorkflowStoreEntries(rootWorkflowExecutionUuid: String) + (implicit ec: ExecutionContext): Future[Int] = { + val action = for { + workflowStoreEntry <- dataAccess.workflowStoreEntriesForWorkflowExecutionUuid(rootWorkflowExecutionUuid).result.headOption + deleted <- workflowStoreEntry match { + case Some(rootWorkflow) => + dataAccess.subWorkflowStoreEntriesForRootWorkflowId(rootWorkflow.workflowStoreEntryId.get).delete + case None => + DBIO.successful(0) + } + } yield deleted + + runTransaction(action) + } +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala index cdcce5a7a..b0c70abf3 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/DataAccessComponent.scala @@ -14,7 +14,8 @@ class DataAccessComponent(val driver: JdbcProfile) with MetadataEntryComponent with SummaryStatusEntryComponent with WorkflowMetadataSummaryEntryComponent - with WorkflowStoreEntryComponent { + with WorkflowStoreEntryComponent + with SubWorkflowStoreEntryComponent { import driver.api._ @@ -29,5 +30,6 @@ class DataAccessComponent(val driver: JdbcProfile) metadataEntries.schema ++ summaryStatusEntries.schema ++ workflowMetadataSummaryEntries.schema ++ - workflowStoreEntries.schema + workflowStoreEntries.schema ++ + subWorkflowStoreEntries.schema } diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala new file mode 100644 index 000000000..848c60c4b --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/SubWorkflowStoreEntryComponent.scala @@ -0,0 +1,62 @@ +package cromwell.database.slick.tables + +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import slick.model.ForeignKeyAction.Cascade + +trait SubWorkflowStoreEntryComponent { + + this: DriverComponent with WorkflowStoreEntryComponent => + + import driver.api._ + + class SubWorkflowStoreEntries(tag: Tag) extends Table[SubWorkflowStoreEntry](tag, "SUB_WORKFLOW_STORE_ENTRY") { + def subWorkflowStoreEntryId = column[Int]("SUB_WORKFLOW_STORE_ENTRY_ID", O.PrimaryKey, O.AutoInc) + + def rootWorkflowId = column[Int]("ROOT_WORKFLOW_ID") + + def parentWorkflowExecutionUuid = column[String]("PARENT_WORKFLOW_EXECUTION_UUID") + + def callFullyQualifiedName = column[String]("CALL_FULLY_QUALIFIED_NAME") + + def callIndex = column[Int]("CALL_INDEX") + + def callAttempt = column[Int]("CALL_ATTEMPT") + + def subWorkflowExecutionUuid = column[String]("SUB_WORKFLOW_EXECUTION_UUID") + + override def * = (rootWorkflowId.?, parentWorkflowExecutionUuid, callFullyQualifiedName, callIndex, callAttempt, subWorkflowExecutionUuid, subWorkflowStoreEntryId.?) <> (SubWorkflowStoreEntry.tupled, SubWorkflowStoreEntry.unapply) + + def ucSubWorkflowStoreEntryPweuCfqnJiJa = index("UC_SUB_WORKFLOW_STORE_ENTRY_PWEU_CFQN_CI_CA", + (parentWorkflowExecutionUuid, callFullyQualifiedName, callIndex, callAttempt), unique = true) + + def fkSubWorkflowStoreRootWorkflowStoreEntryId = foreignKey("FK_SUB_WORKFLOW_STORE_ROOT_WORKFLOW_ID_WORKFLOW_STORE_ENTRY_ID", + rootWorkflowId, workflowStoreEntries)(_.workflowStoreEntryId, onDelete = Cascade) + + def ixSubWorkflowStoreEntryPweu = index("IX_SUB_WORKFLOW_STORE_ENTRY_PWEU", parentWorkflowExecutionUuid, unique = false) + } + + protected val subWorkflowStoreEntries = TableQuery[SubWorkflowStoreEntries] + + val subWorkflowStoreEntryIdsAutoInc = subWorkflowStoreEntries returning subWorkflowStoreEntries.map(_.subWorkflowStoreEntryId) + + val subWorkflowStoreEntriesForRootWorkflowId = Compiled( + (rootWorkflowId: Rep[Int]) => for { + subWorkflowStoreEntry <- subWorkflowStoreEntries + if subWorkflowStoreEntry.rootWorkflowId === rootWorkflowId + } yield subWorkflowStoreEntry + ) + + /** + * Useful for finding the unique sub workflow entry for a given job key + */ + val subWorkflowStoreEntriesForJobKey = Compiled( + (parentWorkflowExecutionUuid: Rep[String], callFullyQualifiedName: Rep[String], jobIndex: Rep[Int], + jobAttempt: Rep[Int]) => + for { + subWorkflowStoreEntry <- subWorkflowStoreEntries + if subWorkflowStoreEntry.parentWorkflowExecutionUuid === parentWorkflowExecutionUuid && + subWorkflowStoreEntry.callFullyQualifiedName === callFullyQualifiedName && + subWorkflowStoreEntry.callIndex === jobIndex && subWorkflowStoreEntry.callAttempt === jobAttempt + } yield subWorkflowStoreEntry + ) +} diff --git a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala index 4a248f803..dc896ca3d 100644 --- a/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala +++ b/database/sql/src/main/scala/cromwell/database/slick/tables/WorkflowStoreEntryComponent.scala @@ -1,6 +1,6 @@ package cromwell.database.slick.tables -import java.sql.{Clob, Timestamp} +import java.sql.{Blob, Clob, Timestamp} import cromwell.database.sql.tables.WorkflowStoreEntry @@ -25,8 +25,10 @@ trait WorkflowStoreEntryComponent { def submissionTime = column[Timestamp]("SUBMISSION_TIME") + def importsZipFile = column[Option[Blob]]("IMPORTS_ZIP") + override def * = (workflowExecutionUuid, workflowDefinition, workflowInputs, workflowOptions, workflowState, - submissionTime, workflowStoreEntryId.?) <> (WorkflowStoreEntry.tupled, WorkflowStoreEntry.unapply) + submissionTime, importsZipFile, workflowStoreEntryId.?) <> (WorkflowStoreEntry.tupled, WorkflowStoreEntry.unapply) def ucWorkflowStoreEntryWeu = index("UC_WORKFLOW_STORE_ENTRY_WEU", workflowExecutionUuid, unique = true) diff --git a/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala index c6c29479b..e1431de76 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/SqlDatabase.scala @@ -10,7 +10,8 @@ trait SqlDatabase extends AutoCloseable with CallCachingSqlDatabase with JobStoreSqlDatabase with MetadataSqlDatabase - with WorkflowStoreSqlDatabase { + with WorkflowStoreSqlDatabase + with SubWorkflowStoreSqlDatabase { protected val urlKey: String protected val originalDatabaseConfig: Config diff --git a/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala b/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala new file mode 100644 index 000000000..10707dc90 --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/sql/SubWorkflowStoreSqlDatabase.scala @@ -0,0 +1,21 @@ +package cromwell.database.sql + +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +trait SubWorkflowStoreSqlDatabase { + this: SqlDatabase => + + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] + + def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] + + def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] +} diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala new file mode 100644 index 000000000..2e718179a --- /dev/null +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/SubWorkflowStoreEntry.scala @@ -0,0 +1,12 @@ +package cromwell.database.sql.tables + +case class SubWorkflowStoreEntry +( + rootWorkflowId: Option[Int], + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String, + subWorkflowStoreEntryId: Option[Int] = None +) diff --git a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala index 1154256b5..66cd80085 100644 --- a/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala +++ b/database/sql/src/main/scala/cromwell/database/sql/tables/WorkflowStoreEntry.scala @@ -1,6 +1,6 @@ package cromwell.database.sql.tables -import java.sql.{Clob, Timestamp} +import java.sql.{Blob, Clob, Timestamp} case class WorkflowStoreEntry ( @@ -10,5 +10,6 @@ case class WorkflowStoreEntry workflowOptions: Clob, workflowState: String, submissionTime: Timestamp, + importsZipFile: Option[Blob], workflowStoreEntryId: Option[Int] = None ) diff --git a/engine/src/main/resources/logback.xml b/engine/src/main/resources/logback.xml deleted file mode 100644 index fa27b5dde..000000000 --- a/engine/src/main/resources/logback.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - %date %X{sourceThread} %-5level - %msg%n - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/engine/src/main/resources/swagger/cromwell.yaml b/engine/src/main/resources/swagger/cromwell.yaml index 7960bb937..e2e825f8f 100644 --- a/engine/src/main/resources/swagger/cromwell.yaml +++ b/engine/src/main/resources/swagger/cromwell.yaml @@ -93,6 +93,11 @@ paths: required: false type: file in: formData + - name: wdlDependencies + description: Workflow Options JSON + required: false + type: file + in: formData tags: - Workflows responses: @@ -134,6 +139,11 @@ paths: required: false type: file in: formData + - name: wdlDependencies + description: Workflow Options JSON + required: false + type: file + in: formData tags: - Workflows responses: @@ -386,6 +396,13 @@ paths: type: string collectionFormat: multi in: query + - name: expandSubWorkflows + description: > + When true, metadata for sub workflows will be fetched and inserted automatically in the metadata response. + required: false + type: boolean + default: false + in: query tags: - Workflows responses: diff --git a/engine/src/main/resources/workflowTimings/workflowTimings.html b/engine/src/main/resources/workflowTimings/workflowTimings.html index bd9de21d6..3e1df152c 100644 --- a/engine/src/main/resources/workflowTimings/workflowTimings.html +++ b/engine/src/main/resources/workflowTimings/workflowTimings.html @@ -1,40 +1,33 @@ - - - + + + var parentWorkflow; + if (selectedRow) parentWorkflow = chartView.getValue(selectedRow, 0); + + var indexOfParentWorkflow = expandedParentWorkflows.indexOf(parentWorkflow); + + if (indexOfParentWorkflow != -1) { + // Remove the parent workflow from the list if it's in it + expandedParentWorkflows.splice(indexOfParentWorkflow, 1); + } else if (parentWorkflow && parentWorkflowNames.indexOf(parentWorkflow) != -1) { + // Add it if it's not + expandedParentWorkflows.push(parentWorkflow); + } + + var rowsToDisplay = dt.getFilteredRows([filter]); + var view = new google.visualization.DataView(dt); + view.setRows(rowsToDisplay); + return view; + } + + function hideAllSubWorkflows(dt) { + var view = new google.visualization.DataView(dt); + function filterFunction(cell, row, column, table) { + return table.getRowProperty(row, "ancestry").length != 0; + } + + view.hideRows(dt.getFilteredRows([{column: 0, test: filterFunction}])); + return view; + } + + + + -
diff --git a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala index ab9cbceac..a738984dd 100644 --- a/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala +++ b/engine/src/main/scala/cromwell/engine/EngineFilesystems.scala @@ -1,18 +1,40 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems} - +import akka.actor.ActorSystem import cats.data.Validated.{Invalid, Valid} +import com.google.api.client.http.HttpResponseException import com.typesafe.config.ConfigFactory import cromwell.core.WorkflowOptions -import cromwell.engine.backend.EnhancedWorkflowOptions._ -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} +import cromwell.core.path.{CustomRetryParams, DefaultPathBuilder, PathBuilder} +import cromwell.core.retry.SimpleExponentialBackoff +import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} import lenthall.exception.MessageAggregation import net.ceedubs.ficus.Ficus._ -import scala.concurrent.ExecutionContext +import scala.concurrent.duration._ +import scala.language.postfixOps + +case class EngineFilesystems(actorSystem: ActorSystem) { -object EngineFilesystems { + private def isFatalGcsException(t: Throwable): Boolean = t match { + case e: HttpResponseException if e.getStatusCode == 403 => true + case e: HttpResponseException if e.getStatusCode == 400 && e.getContent.contains("INVALID_ARGUMENT") => true + case _ => false + } + + private def isTransientGcsException(t: Throwable): Boolean = t match { + // Quota exceeded + case e: HttpResponseException if e.getStatusCode == 429 => true + case _ => false + } + + private val GcsRetryParams = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = isTransientGcsException, + isFatal = isFatalGcsException + ) private val config = ConfigFactory.load private val googleConf: GoogleConfiguration = GoogleConfiguration(config) @@ -26,14 +48,11 @@ object EngineFilesystems { } } - def filesystemsForWorkflow(workflowOptions: WorkflowOptions)(implicit ec: ExecutionContext): List[FileSystem] = { - def gcsFileSystem: Option[GcsFileSystem] = { - googleAuthMode map { mode => - val storage = mode.buildStorage(workflowOptions.toGoogleAuthOptions, googleConf.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)) - } - } + private val gcsPathBuilderFactory = googleAuthMode map { mode => + RetryableGcsPathBuilderFactory(mode, customRetryParams = GcsRetryParams) + } - List(gcsFileSystem, Option(FileSystems.getDefault)).flatten + def pathBuildersForWorkflow(workflowOptions: WorkflowOptions): List[PathBuilder] = { + List(gcsPathBuilderFactory map { _.withOptions(workflowOptions)(actorSystem) }, Option(DefaultPathBuilder)).flatten } } diff --git a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala index c493b41a6..d8aa2a44d 100644 --- a/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala +++ b/engine/src/main/scala/cromwell/engine/EngineWorkflowDescriptor.scala @@ -1,20 +1,28 @@ package cromwell.engine -import java.nio.file.FileSystem - import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.WorkflowOptions.WorkflowOption import cromwell.core.callcaching.CallCachingMode +import cromwell.core.path.PathBuilder import wdl4s._ -final case class EngineWorkflowDescriptor(backendDescriptor: BackendWorkflowDescriptor, +final case class EngineWorkflowDescriptor(namespace: WdlNamespaceWithWorkflow, + backendDescriptor: BackendWorkflowDescriptor, workflowInputs: WorkflowCoercedInputs, - backendAssignments: Map[Call, String], + backendAssignments: Map[TaskCall, String], failureMode: WorkflowFailureMode, - engineFilesystems: List[FileSystem], - callCachingMode: CallCachingMode) { - def id = backendDescriptor.id - def namespace = backendDescriptor.workflowNamespace - def name = namespace.workflow.unqualifiedName + pathBuilders: List[PathBuilder], + callCachingMode: CallCachingMode, + parentWorkflow: Option[EngineWorkflowDescriptor] = None) { + + val rootWorkflow: EngineWorkflowDescriptor = parentWorkflow match { + case Some(parent) => parent.rootWorkflow + case None => this + } + + val id = backendDescriptor.id + lazy val workflow = backendDescriptor.workflow + lazy val name = workflow.unqualifiedName + val inputs = backendDescriptor.inputs def getWorkflowOption(key: WorkflowOption) = backendDescriptor.getWorkflowOption(key) } diff --git a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala index 3cc8ee1ca..9fe346c50 100644 --- a/engine/src/main/scala/cromwell/engine/WdlFunctions.scala +++ b/engine/src/main/scala/cromwell/engine/WdlFunctions.scala @@ -1,17 +1,13 @@ package cromwell.engine -import java.nio.file.FileSystem - -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions} -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.backend.wdl.ReadLikeFunctions +import wdl4s.expression.PureStandardLibraryFunctionsLike +import cromwell.core.path.PathBuilder import wdl4s.values.{WdlFile, WdlValue} import scala.util.{Failure, Try} -class WdlFunctions(val fileSystems: List[FileSystem]) extends WdlStandardLibraryFunctions with ReadLikeFunctions with PureFunctions { - /** - * Ordered list of filesystems to be used to execute WDL functions needing IO. - */ +class WdlFunctions(val pathBuilders: List[PathBuilder]) extends PureStandardLibraryFunctionsLike with ReadLikeFunctions { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not supported at the workflow level yet")) override def write_json(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("write_json") diff --git a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala b/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala deleted file mode 100644 index e2043cb65..000000000 --- a/engine/src/main/scala/cromwell/engine/backend/EnhancedWorkflowOptions.scala +++ /dev/null @@ -1,16 +0,0 @@ -package cromwell.engine.backend - -import cromwell.core.WorkflowOptions -import cromwell.filesystems.gcs.GoogleAuthMode -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions - -import scala.util.Try - -object EnhancedWorkflowOptions { - - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala index 8abb0874c..8a72bc414 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/SingleWorkflowRunnerActor.scala @@ -9,13 +9,15 @@ import better.files._ import cats.instances.try_._ import cats.syntax.functor._ import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{ExecutionStore => _, _} +import cromwell.core._ import cromwell.engine.workflow.SingleWorkflowRunnerActor._ import cromwell.engine.workflow.WorkflowManagerActor.RetrieveNewWorkflows -import cromwell.engine.workflow.workflowstore.WorkflowStoreActor +import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.SubmitWorkflow +import cromwell.jobstore.EmptyJobStoreActor import cromwell.server.CromwellRootActor import cromwell.services.metadata.MetadataService.{GetSingleWorkflowMetadataAction, GetStatus, WorkflowOutputs} +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor import spray.http.StatusCodes @@ -26,160 +28,151 @@ import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.{Failure, Try} -object SingleWorkflowRunnerActor { - def props(source: WorkflowSourceFiles, metadataOutputFile: Option[Path]): Props = { - Props(new SingleWorkflowRunnerActor(source, metadataOutputFile)) - } - - sealed trait RunnerMessage - // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the - // running and collect a result. - case object RunWorkflow extends RunnerMessage - private case object IssuePollRequest extends RunnerMessage - private case object IssueReply extends RunnerMessage - - sealed trait RunnerState - case object NotStarted extends RunnerState - case object RunningWorkflow extends RunnerState - case object RequestingOutputs extends RunnerState - case object RequestingMetadata extends RunnerState - case object Done extends RunnerState - - final case class RunnerData(replyTo: Option[ActorRef] = None, - terminalState: Option[WorkflowState] = None, - id: Option[WorkflowId] = None, - failures: Seq[Throwable] = Seq.empty) { - - def addFailure(message: String): RunnerData = addFailure(new RuntimeException(message)) - - def addFailure(e: Throwable): RunnerData = this.copy(failures = e +: failures) - } - - implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { - def state: WorkflowState = WorkflowState.fromString(jsObject.fields("status").asInstanceOf[JsString].value) - } - - private val Tag = "SingleWorkflowRunnerActor" -} - /** * Designed explicitly for the use case of the 'run' functionality in Main. This Actor will start a workflow, - * print out the outputs when complete and then shut down the actor system. Note that multiple aspects of this - * are sub-optimal for future use cases where one might want a single workflow being run. + * print out the outputs when complete and reply with a result. */ -class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: Option[Path]) - extends CromwellRootActor with LoggingFSM[RunnerState, RunnerData] { +class SingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, metadataOutputPath: Option[Path]) + extends CromwellRootActor with LoggingFSM[RunnerState, SwraData] { + + override val serverMode = false import SingleWorkflowRunnerActor._ private val backoff = SimpleExponentialBackoff(1 second, 1 minute, 1.2) - startWith(NotStarted, RunnerData()) - - private def requestMetadata: State = { - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${stateData.id.get}") - metadataBuilder ! GetSingleWorkflowMetadataAction(stateData.id.get, None, None) - goto (RequestingMetadata) - } - - private def schedulePollRequest(): Unit = { - // -Ywarn-value-discard should stash Cancellable to cancel - context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) - () - } - - private def requestStatus(): Unit = { - // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. - // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for - // when outputs or metadata have stabilized. - val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! GetStatus(stateData.id.get) - } + override val abortJobsOnTerminate = true + override lazy val workflowStore = new InMemoryWorkflowStore() + override lazy val jobStoreActor = context.actorOf(EmptyJobStoreActor.props) + override lazy val subWorkflowStoreActor = context.actorOf(EmptySubWorkflowStoreActor.props) - private def issueReply: State = { - self ! IssueReply - goto (Done) - } + startWith(NotStarted, EmptySwraData) when (NotStarted) { - case Event(RunWorkflow, data) => + case Event(RunWorkflow, EmptySwraData) => log.info(s"$Tag: Submitting workflow") workflowStoreActor ! SubmitWorkflow(source) - goto (RunningWorkflow) using data.copy(replyTo = Option(sender())) + goto(SubmittedWorkflow) using SubmittedSwraData(sender()) } - when (RunningWorkflow) { - case Event(WorkflowStoreActor.WorkflowSubmittedToStore(id), data) => + when (SubmittedWorkflow) { + case Event(WorkflowStoreActor.WorkflowSubmittedToStore(id), SubmittedSwraData(replyTo)) => log.info(s"$Tag: Workflow submitted UUID($id)") // Since we only have a single workflow, force the WorkflowManagerActor's hand in case the polling rate is long workflowManagerActor ! RetrieveNewWorkflows schedulePollRequest() - stay() using data.copy(id = Option(id)) - case Event(IssuePollRequest, data) => - data.id match { - case None => schedulePollRequest() - case _ => requestStatus() - } + goto(RunningWorkflow) using RunningSwraData(replyTo, id) + } + + when (RunningWorkflow) { + case Event(IssuePollRequest, RunningSwraData(_, id)) => + requestStatus(id) stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if !jsObject.state.isTerminal => + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(_, _)) if !jsObject.state.isTerminal => schedulePollRequest() stay() - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowSucceeded => + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowSucceeded => val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), - s"CompleteRequest-Workflow-${stateData.id.get}-request-${UUID.randomUUID()}") - metadataBuilder ! WorkflowOutputs(data.id.get) - goto(RequestingOutputs) using data.copy(terminalState = Option(WorkflowSucceeded)) - case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), data) if jsObject.state == WorkflowFailed => - val updatedData = data.copy(terminalState = Option(WorkflowFailed)).addFailure(s"Workflow ${data.id.get} transitioned to state Failed") - // If there's an output path specified then request metadata, otherwise issue a reply to the original sender. - val nextState = if (metadataOutputPath.isDefined) requestMetadata else issueReply - nextState using updatedData + s"CompleteRequest-Workflow-$id-request-${UUID.randomUUID()}") + metadataBuilder ! WorkflowOutputs(id) + log.info(s"$Tag workflow finished with status '$WorkflowSucceeded'.") + goto(RequestingOutputs) using SucceededSwraData(replyTo, id) + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowFailed => + log.info(s"$Tag workflow finished with status '$WorkflowFailed'.") + requestMetadataOrIssueReply(FailedSwraData(replyTo, id, new RuntimeException(s"Workflow $id transitioned to state $WorkflowFailed"))) + case Event(RequestComplete((StatusCodes.OK, jsObject: JsObject)), RunningSwraData(replyTo, id)) if jsObject.state == WorkflowAborted => + log.info(s"$Tag workflow finished with status '$WorkflowAborted'.") + requestMetadataOrIssueReply(AbortedSwraData(replyTo, id)) } when (RequestingOutputs) { - case Event(RequestComplete((StatusCodes.OK, outputs: JsObject)), _) => + case Event(RequestComplete((StatusCodes.OK, outputs: JsObject)), data: TerminalSwraData) => outputOutputs(outputs) - if (metadataOutputPath.isDefined) requestMetadata else issueReply + requestMetadataOrIssueReply(data) } when (RequestingMetadata) { - case Event(RequestComplete((StatusCodes.OK, metadata: JsObject)), _) => + case Event(RequestComplete((StatusCodes.OK, metadata: JsObject)), data: TerminalSwraData) => outputMetadata(metadata) - issueReply - } - - when (Done) { - case Event(IssueReply, data) => - data.terminalState foreach { state => log.info(s"$Tag workflow finished with status '$state'.") } - data.failures foreach { e => log.error(e, e.getMessage) } - - val message: Any = data.terminalState collect { case WorkflowSucceeded => () } getOrElse Status.Failure(data.failures.head) - data.replyTo foreach { _ ! message } - stay() + issueReply(data) } onTransition { case NotStarted -> RunningWorkflow => schedulePollRequest() } - private def failAndFinish(e: Throwable): State = { - log.error(e, s"$Tag received Failure message: ${e.getMessage}") - issueReply using stateData.addFailure(e) - } - whenUnhandled { // Handle failures for all failure responses generically. - case Event(r: WorkflowStoreActor.WorkflowAbortFailed, data) => failAndFinish(r.reason) - case Event(Failure(e), data) => failAndFinish(e) - case Event(Status.Failure(e), data) => failAndFinish(e) - case Event(RequestComplete((_, snap)), _) => failAndFinish(new RuntimeException(s"Unexpected API completion message: $snap")) + case Event(r: WorkflowStoreActor.WorkflowAbortFailed, data) => failAndFinish(r.reason, data) + case Event(Failure(e), data) => failAndFinish(e, data) + case Event(Status.Failure(e), data) => failAndFinish(e, data) + case Event(RequestComplete((_, snap)), data) => failAndFinish(new RuntimeException(s"Unexpected API completion message: $snap"), data) case Event((CurrentState(_, _) | Transition(_, _, _)), _) => // ignore uninteresting current state and transition messages stay() - case Event(m, _) => - log.warning(s"$Tag: received unexpected message: $m") + case Event(m, d) => + log.warning(s"$Tag: received unexpected message: $m in state ${d.getClass.getSimpleName}") stay() } + private def requestMetadataOrIssueReply(newData: TerminalSwraData) = if (metadataOutputPath.isDefined) requestMetadata(newData) else issueReply(newData) + + private def requestMetadata(newData: TerminalSwraData): State = { + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"MetadataRequest-Workflow-${newData.id}") + metadataBuilder ! GetSingleWorkflowMetadataAction(newData.id, None, None, expandSubWorkflows = true) + goto (RequestingMetadata) using newData + } + + private def schedulePollRequest(): Unit = { + // -Ywarn-value-discard should stash Cancellable to cancel + context.system.scheduler.scheduleOnce(backoff.backoffMillis.millis, self, IssuePollRequest) + () + } + + private def requestStatus(id: WorkflowId): Unit = { + // This requests status via the metadata service rather than instituting an FSM watch on the underlying workflow actor. + // Cromwell's eventual consistency means it isn't safe to use an FSM transition to a terminal state as the signal for + // when outputs or metadata have stabilized. + val metadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), s"StatusRequest-Workflow-$id-request-${UUID.randomUUID()}") + metadataBuilder ! GetStatus(id) + } + + private def issueSuccessReply(replyTo: ActorRef): State = { + replyTo.tell(msg = (), sender = self) // Because replyTo ! () is the parameterless call replyTo.!() + context.stop(self) + stay() + } + + private def issueFailureReply(replyTo: ActorRef, e: Throwable): State = { + replyTo ! Status.Failure(e) + context.stop(self) + stay() + } + + private def issueReply(data: TerminalSwraData) = { + data match { + case s: SucceededSwraData => issueSuccessReply(s.replyTo) + case f: FailedSwraData => issueFailureReply(f.replyTo, f.failure) + case a: AbortedSwraData => issueSuccessReply(a.replyTo) + + } + } + + private def failAndFinish(e: Throwable, data: SwraData): State = { + log.error(e, s"$Tag received Failure message: ${e.getMessage}") + data match { + case EmptySwraData => + log.error(e, "Cannot issue response. Need a 'replyTo' address to issue the exception response") + context.stop(self) + stay() + case SubmittedSwraData(replyTo) => + issueFailureReply(replyTo, e) + case RunningSwraData(replyTo, _) => + issueFailureReply(replyTo, e) + case c: TerminalSwraData => + issueFailureReply(c.replyTo, e) + } + } + /** * Outputs the outputs to stdout, and then requests the metadata. */ @@ -199,3 +192,44 @@ class SingleWorkflowRunnerActor(source: WorkflowSourceFiles, metadataOutputPath: } void } } + +object SingleWorkflowRunnerActor { + def props(source: WorkflowSourceFilesCollection, metadataOutputFile: Option[Path]): Props = { + Props(new SingleWorkflowRunnerActor(source, metadataOutputFile)) + } + + sealed trait RunnerMessage + // The message to actually run the workflow is made explicit so the non-actor Main can `ask` this actor to do the + // running and collect a result. + case object RunWorkflow extends RunnerMessage + private case object IssuePollRequest extends RunnerMessage + + sealed trait RunnerState + case object NotStarted extends RunnerState + case object SubmittedWorkflow extends RunnerState + case object RunningWorkflow extends RunnerState + case object RequestingOutputs extends RunnerState + case object RequestingMetadata extends RunnerState + + sealed trait SwraData + case object EmptySwraData extends SwraData + final case class SubmittedSwraData(replyTo: ActorRef) extends SwraData + final case class RunningSwraData(replyTo: ActorRef, id: WorkflowId) extends SwraData + + sealed trait TerminalSwraData extends SwraData { def replyTo: ActorRef; def terminalState: WorkflowState; def id: WorkflowId } + final case class SucceededSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowSucceeded } + + final case class FailedSwraData(replyTo: ActorRef, + id: WorkflowId, + failure: Throwable) extends TerminalSwraData { override val terminalState = WorkflowFailed } + + final case class AbortedSwraData(replyTo: ActorRef, + id: WorkflowId) extends TerminalSwraData { override val terminalState = WorkflowAborted } + + implicit class EnhancedJsObject(val jsObject: JsObject) extends AnyVal { + def state: WorkflowState = WorkflowState.fromString(jsObject.fields("status").asInstanceOf[JsString].value) + } + + private val Tag = "SingleWorkflowRunnerActor" +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 1035872d3..6d2a2ff40 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -1,15 +1,14 @@ package cromwell.engine.workflow -import java.time.OffsetDateTime - import akka.actor.SupervisorStrategy.Escalate import akka.actor._ import com.typesafe.config.Config -import cromwell.backend.AllBackendInitializationData +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.FinalWorkflowLogDir import cromwell.core._ import cromwell.core.logging.{WorkflowLogger, WorkflowLogging} +import cromwell.core.path.PathFactory import cromwell.engine._ import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ @@ -17,13 +16,12 @@ import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{Ma import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle._ -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor +import cromwell.engine.workflow.lifecycle.execution.{WorkflowExecutionActor, WorkflowMetadataHelper} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.subworkflowstore.SubWorkflowStoreActor.WorkflowComplete import cromwell.webservice.EngineStatsActor - -import scala.util.Random +import wdl4s.{LocallyQualifiedName => _} object WorkflowActor { @@ -135,16 +133,18 @@ object WorkflowActor { def props(workflowId: WorkflowId, startMode: StartMode, - wdlSource: WorkflowSourceFiles, + wdlSource: WorkflowSourceFilesCollection, conf: Config, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection): Props = { + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean): Props = { Props(new WorkflowActor(workflowId, startMode, wdlSource, conf, serviceRegistryActor, workflowLogCopyRouter, - jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection)).withDispatcher(EngineDispatcher) + jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, serverMode)).withDispatcher(EngineDispatcher) } } @@ -153,30 +153,32 @@ object WorkflowActor { */ class WorkflowActor(val workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSources: WorkflowSourceFilesCollection, conf: Config, - serviceRegistryActor: ActorRef, + override val serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection) - extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with PathFactory { + backendSingletonCollection: BackendSingletonCollection, + serverMode: Boolean) + extends LoggingFSM[WorkflowActorState, WorkflowActorData] with WorkflowLogging with WorkflowMetadataHelper { implicit val ec = context.dispatcher + override val workflowIdForLogging = workflowId startWith(WorkflowUnstartedState, WorkflowActorData.empty) - pushCurrentStateToMetadataService(WorkflowUnstartedState.workflowState) - + pushCurrentStateToMetadataService(workflowId, WorkflowUnstartedState.workflowState) + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } when(WorkflowUnstartedState) { case Event(StartWorkflowCommand, _) => - val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId), + val actor = context.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryActor, workflowId, importLocalFilesystem = !serverMode), "MaterializeWorkflowDescriptorActor") - val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) - serviceRegistryActor ! PutMetadataAction(startEvent) + pushWorkflowStart(workflowId) actor ! MaterializeWorkflowDescriptorCommand(workflowSources, conf) goto(MaterializingWorkflowDescriptorState) using stateData.copy(currentLifecycleStateActor = Option(actor)) @@ -203,10 +205,11 @@ class WorkflowActor(val workflowId: WorkflowId, case RestartExistingWorkflow => true } - val executionActor = context.actorOf(WorkflowExecutionActor.props(workflowId, + val executionActor = context.actorOf(WorkflowExecutionActor.props( workflowDescriptor, serviceRegistryActor, jobStoreActor, + subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, @@ -217,16 +220,16 @@ class WorkflowActor(val workflowId: WorkflowId, goto(ExecutingWorkflowState) using data.copy(currentLifecycleStateActor = Option(executionActor), initializationData = initializationData) case Event(WorkflowInitializationFailedResponse(reason), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, Option(reason.toList)) + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, Option(reason.toList)) } when(ExecutingWorkflowState) { - case Event(WorkflowExecutionSucceededResponse(executionStore, outputStore), + case Event(WorkflowExecutionSucceededResponse(jobKeys, outputs), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, None) - case Event(WorkflowExecutionFailedResponse(executionStore, outputStore, failures), + finalizeWorkflow(data, workflowDescriptor, jobKeys, outputs, None) + case Event(WorkflowExecutionFailedResponse(jobKeys, failures), data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, executionStore, outputStore, Option(failures.toList)) + finalizeWorkflow(data, workflowDescriptor, jobKeys, Map.empty, Option(List(failures))) case Event(msg @ EngineStatsActor.JobCountQuery, data) => data.currentLifecycleStateActor match { case Some(a) => a forward msg @@ -245,7 +248,7 @@ class WorkflowActor(val workflowId: WorkflowId, when(WorkflowAbortingState) { case Event(x: EngineLifecycleStateCompleteResponse, data @ WorkflowActorData(_, Some(workflowDescriptor), _, _)) => - finalizeWorkflow(data, workflowDescriptor, ExecutionStore.empty, OutputStore.empty, failures = None) + finalizeWorkflow(data, workflowDescriptor, Map.empty, Map.empty, failures = None) case _ => stay() } @@ -278,22 +281,19 @@ class WorkflowActor(val workflowId: WorkflowId, // Only publish "External" state to metadata service // workflowState maps a state to an "external" state (e.g all states extending WorkflowActorRunningState map to WorkflowRunning) if (fromState.workflowState != toState.workflowState) { - pushCurrentStateToMetadataService(toState.workflowState) + pushCurrentStateToMetadataService(workflowId, toState.workflowState) } } onTransition { case (oldState, terminalState: WorkflowActorTerminalState) => workflowLogger.debug(s"transition from {} to {}. Stopping self.", arg1 = oldState, arg2 = terminalState) - // Add the end time of the workflow in the MetadataService - val now = OffsetDateTime.now - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(now)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + pushWorkflowEnd(workflowId) + subWorkflowStoreActor ! WorkflowComplete(workflowId) terminalState match { case WorkflowFailedState => val failures = nextStateData.lastStateReached.failures.getOrElse(List.empty) - val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } - serviceRegistryActor ! PutMetadataAction(failureEvents) + pushWorkflowFailures(workflowId, failures) context.parent ! WorkflowFailedResponse(workflowId, nextStateData.lastStateReached.state, failures) case _ => // The WMA is watching state transitions and needs no further info } @@ -303,7 +303,7 @@ class WorkflowActor(val workflowId: WorkflowId, stateData.workflowDescriptor foreach { wd => wd.getWorkflowOption(FinalWorkflowLogDir) match { case Some(destinationDir) => - workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(wd.id, buildPath(destinationDir, wd.engineFilesystems)) + workflowLogCopyRouter ! CopyWorkflowLogsActor.Copy(wd.id, PathFactory.buildPath(destinationDir, wd.pathBuilders)) case None if WorkflowLogger.isTemporary => workflowLogger.deleteLogFile() case _ => } @@ -323,24 +323,17 @@ class WorkflowActor(val workflowId: WorkflowId, goto(finalState) using data.copy(currentLifecycleStateActor = None) } - private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = { - context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, executionStore, outputStore, stateData.initializationData), name = s"WorkflowFinalizationActor") + private[workflow] def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs) = { + context.actorOf(WorkflowFinalizationActor.props(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, stateData.initializationData), name = s"WorkflowFinalizationActor") } /** * Run finalization actor and transition to FinalizingWorkflowState. */ private def finalizeWorkflow(data: WorkflowActorData, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, failures: Option[List[Throwable]]) = { - val finalizationActor = makeFinalizationActor(workflowDescriptor, executionStore, outputStore) + val finalizationActor = makeFinalizationActor(workflowDescriptor, jobExecutionMap, workflowOutputs) finalizationActor ! StartFinalizationCommand goto(FinalizingWorkflowState) using data.copy(lastStateReached = StateCheckpoint(stateName, failures)) } - - // Update the current State of the Workflow (corresponding to the FSM state) in the Metadata service - private def pushCurrentStateToMetadataService(workflowState: WorkflowState): Unit = { - val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), - MetadataValue(workflowState)) - serviceRegistryActor ! PutMetadataAction(metadataEventMsg) - } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index 08bade654..a19772cd8 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -1,6 +1,5 @@ package cromwell.engine.workflow - import akka.actor.FSM.{CurrentState, SubscribeTransitionCallBack, Transition} import akka.actor._ import akka.event.Logging @@ -16,8 +15,10 @@ import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSucce import cromwell.services.metadata.MetadataService._ import cromwell.webservice.EngineStatsActor import net.ceedubs.ficus.Ficus._ +import org.apache.commons.lang3.exception.ExceptionUtils + import scala.concurrent.duration._ -import scala.concurrent.{Await, Promise} +import scala.sys.ShutdownHookThread object WorkflowManagerActor { val DefaultMaxWorkflowsToRun = 5000 @@ -42,12 +43,16 @@ object WorkflowManagerActor { serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection): Props = { - Props(new WorkflowManagerActor( - workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) - ).withDispatcher(EngineDispatcher) + backendSingletonCollection: BackendSingletonCollection, + abortJobsOnTerminate: Boolean, + serverMode: Boolean): Props = { + val params = WorkflowManagerActorParams(ConfigFactory.load, workflowStore, serviceRegistryActor, + workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, + abortJobsOnTerminate, serverMode) + Props(new WorkflowManagerActor(params)).withDispatcher(EngineDispatcher) } /** @@ -78,24 +83,22 @@ object WorkflowManagerActor { } } -class WorkflowManagerActor(config: Config, - val workflowStore: ActorRef, - val serviceRegistryActor: ActorRef, - val workflowLogCopyRouter: ActorRef, - val jobStoreActor: ActorRef, - val callCacheReadActor: ActorRef, - val jobTokenDispenserActor: ActorRef, - val backendSingletonCollection: BackendSingletonCollection) +case class WorkflowManagerActorParams(config: Config, + workflowStore: ActorRef, + serviceRegistryActor: ActorRef, + workflowLogCopyRouter: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + abortJobsOnTerminate: Boolean, + serverMode: Boolean) + +class WorkflowManagerActor(params: WorkflowManagerActorParams) extends LoggingFSM[WorkflowManagerState, WorkflowManagerData] { - def this(workflowStore: ActorRef, - serviceRegistryActor: ActorRef, - workflowLogCopyRouter: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection) = this( - ConfigFactory.load, workflowStore, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) + private val config = params.config private val maxWorkflowsRunning = config.getConfig("system").as[Option[Int]]("max-concurrent-workflows").getOrElse(DefaultMaxWorkflowsToRun) private val maxWorkflowsToLaunch = config.getConfig("system").as[Option[Int]]("max-workflow-launch-count").getOrElse(DefaultMaxWorkflowsToLaunch) @@ -104,9 +107,8 @@ class WorkflowManagerActor(config: Config, private val logger = Logging(context.system, this) private val tag = self.path.name - private val donePromise = Promise[Unit]() - private var abortingWorkflowToReplyTo = Map.empty[WorkflowId, ActorRef] + private var shutdownHookThreadOption: Option[ShutdownHookThread] = None override def preStart(): Unit = { addShutdownHook() @@ -114,18 +116,38 @@ class WorkflowManagerActor(config: Config, self ! RetrieveNewWorkflows } + override def postStop() = { + // If the actor is stopping, especially during error tests, then there's nothing to wait for later at JVM shutdown. + tryRemoveShutdownHook() + super.postStop() + } + private def addShutdownHook() = { - // Only abort jobs on SIGINT if the config explicitly sets system.abortJobsOnTerminate = true. + // Only abort jobs on SIGINT if the config explicitly sets system.abort-jobs-on-terminate = true. val abortJobsOnTerminate = - config.getConfig("system").as[Option[Boolean]]("abort-jobs-on-terminate").getOrElse(false) + config.getConfig("system").as[Option[Boolean]]("abort-jobs-on-terminate").getOrElse(params.abortJobsOnTerminate) if (abortJobsOnTerminate) { - sys.addShutdownHook { - logger.info(s"$tag: Received shutdown signal. Aborting all running workflows...") + val shutdownHookThread = sys.addShutdownHook { + logger.info(s"$tag: Received shutdown signal.") self ! AbortAllWorkflowsCommand - Await.result(donePromise.future, Duration.Inf) + while (stateData != null && stateData.workflows.nonEmpty) { + log.info(s"Waiting for ${stateData.workflows.size} workflows to abort...") + Thread.sleep(1000) + } } + shutdownHookThreadOption = Option(shutdownHookThread) + } + } + + private def tryRemoveShutdownHook() = { + try { + shutdownHookThreadOption.foreach(_.remove()) + } catch { + case _: IllegalStateException => /* ignore, we're probably shutting down */ + case exception: Exception => log.error(exception, "Error while removing shutdown hook: {}", exception.getMessage) } + shutdownHookThreadOption = None } startWith(Running, WorkflowManagerData(workflows = Map.empty)) @@ -140,7 +162,7 @@ class WorkflowManagerActor(config: Config, Determine the number of available workflow slots and request the smaller of that number of maxWorkflowsToLaunch. */ val maxNewWorkflows = maxWorkflowsToLaunch min (maxWorkflowsRunning - stateData.workflows.size) - workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) + params.workflowStore ! WorkflowStoreActor.FetchRunnableWorkflows(maxNewWorkflows) stay() case Event(WorkflowStoreActor.NoNewWorkflowsToStart, stateData) => log.debug("WorkflowStore provided no new workflows to start") @@ -178,7 +200,7 @@ class WorkflowManagerActor(config: Config, Responses from services */ case Event(WorkflowFailedResponse(workflowId, inState, reasons), data) => - log.error(s"$tag Workflow $workflowId failed (during $inState): ${reasons.mkString("\n")}") + log.error(s"$tag Workflow $workflowId failed (during $inState): ${expandFailureReasons(reasons)}") stay() /* Watched transitions @@ -187,13 +209,13 @@ class WorkflowManagerActor(config: Config, log.info(s"$tag ${workflowActor.path.name} is in a terminal state: $toState") // This silently fails if idFromActor is None, but data.without call right below will as well data.idFromActor(workflowActor) foreach { workflowId => - jobStoreActor ! RegisterWorkflowCompleted(workflowId) + params.jobStoreActor ! RegisterWorkflowCompleted(workflowId) if (toState.workflowState == WorkflowAborted) { val replyTo = abortingWorkflowToReplyTo(workflowId) replyTo ! WorkflowStoreActor.WorkflowAborted(workflowId) abortingWorkflowToReplyTo -= workflowId } else { - workflowStore ! WorkflowStoreActor.RemoveWorkflow(workflowId) + params.workflowStore ! WorkflowStoreActor.RemoveWorkflow(workflowId) } } stay using data.without(workflowActor) @@ -242,8 +264,7 @@ class WorkflowManagerActor(config: Config, onTransition { case _ -> Done => - logger.info(s"$tag All workflows finished. Stopping self.") - donePromise.trySuccess(()) + logger.info(s"$tag All workflows finished") () case fromState -> toState => logger.debug(s"$tag transitioning from $fromState to $toState") @@ -264,8 +285,9 @@ class WorkflowManagerActor(config: Config, StartNewWorkflow } - val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, serviceRegistryActor, - workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection) + val wfProps = WorkflowActor.props(workflowId, startMode, workflow.sources, config, params.serviceRegistryActor, + params.workflowLogCopyRouter, params.jobStoreActor, params.subWorkflowStoreActor, params.callCacheReadActor, params.jobTokenDispenserActor, + params.backendSingletonCollection, params.serverMode) val wfActor = context.actorOf(wfProps, name = s"WorkflowActor-$workflowId") wfActor ! SubscribeTransitionCallBack(self) @@ -277,4 +299,10 @@ class WorkflowManagerActor(config: Config, private def scheduleNextNewWorkflowPoll() = { context.system.scheduler.scheduleOnce(newWorkflowPollRate, self, RetrieveNewWorkflows)(context.dispatcher) } + + private def expandFailureReasons(reasons: Seq[Throwable]) = { + reasons map { reason => + reason.getMessage + "\n" + ExceptionUtils.getStackTrace(reason) + } mkString "\n" + } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala index aa25fdfb8..55ef429be 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowLogsActor.scala @@ -27,8 +27,7 @@ object CopyWorkflowLogsActor { // Which could be used for other copying work (outputs, call logs..) class CopyWorkflowLogsActor(serviceRegistryActor: ActorRef) extends Actor - with ActorLogging - with PathFactory { + with ActorLogging { def copyAndClean(src: Path, dest: Path) = { File(dest).parent.createDirectories() diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala index a5028e5af..1dbf8b270 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/CopyWorkflowOutputsActor.scala @@ -5,33 +5,33 @@ import java.nio.file.Path import akka.actor.Props import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationResponse, FinalizationSuccess} import cromwell.backend.{AllBackendInitializationData, BackendConfigurationDescriptor, BackendInitializationData, BackendLifecycleActorFactory} -import cromwell.core._ import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions._ +import cromwell.core._ +import cromwell.core.path.{PathCopier, PathFactory} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.{BackendConfiguration, CromwellBackends} -import wdl4s.ReportableSymbol -import wdl4s.values.WdlSingleFile +import wdl4s.values.{WdlArray, WdlMap, WdlSingleFile, WdlValue} import scala.concurrent.{ExecutionContext, Future} object CopyWorkflowOutputsActor { - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) = Props( - new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, outputStore, initializationData) + new CopyWorkflowOutputsActor(workflowId, workflowDescriptor, workflowOutputs, initializationData) ).withDispatcher(IoDispatcher) } -class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, outputStore: OutputStore, +class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) extends EngineWorkflowFinalizationActor with PathFactory { - private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { - val workflowOutputsPath = buildPath(workflowOutputsFilePath, workflowDescriptor.engineFilesystems) + override val pathBuilders = workflowDescriptor.pathBuilders - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs + private def copyWorkflowOutputs(workflowOutputsFilePath: String): Unit = { + val workflowOutputsPath = buildPath(workflowOutputsFilePath) - val outputFilePaths = getOutputFilePaths(reportableOutputs) + val outputFilePaths = getOutputFilePaths outputFilePaths foreach { case (workflowRootPath, srcPath) => @@ -40,23 +40,23 @@ class CopyWorkflowOutputsActor(workflowId: WorkflowId, val workflowDescriptor: E } } - private def getOutputFilePaths(reportableOutputs: Seq[ReportableSymbol]): Seq[(Path, Path)] = { + private def findFiles(values: Seq[WdlValue]): Seq[WdlSingleFile] = { + values flatMap { + case file: WdlSingleFile => Seq(file) + case array: WdlArray => findFiles(array.value) + case map: WdlMap => findFiles(map.value.values.toSeq) + case _ => Seq.empty + } + } + + private def getOutputFilePaths: Seq[(Path, Path)] = { for { - reportableOutput <- reportableOutputs // NOTE: Without .toSeq, outputs in arrays only yield the last output - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq).toSeq + backend <- workflowDescriptor.backendAssignments.values.toSeq config <- BackendConfiguration.backendConfigurationDescriptor(backend).toOption.toSeq rootPath <- getBackendRootPath(backend, config).toSeq - call <- calls - // NOTE: Without .toSeq, outputs in arrays only yield the last output - (outputCallKey, outputEntries) <- outputStore.store.toSeq - // Only get paths for the original scatter call, not the indexed entries - if outputCallKey.call == call && outputCallKey.index.isEmpty - outputEntry <- outputEntries - if reportableOutput.fullyQualifiedName == s"${call.fullyQualifiedName}.${outputEntry.name}" - wdlValue <- outputEntry.wdlValue.toSeq - collected = wdlValue collectAsSeq { case f: WdlSingleFile => f } - wdlFile <- collected + outputFiles = findFiles(workflowOutputs.values.map(_.wdlValue).toSeq) + wdlFile <- outputFiles wdlPath = rootPath.getFileSystem.getPath(wdlFile.value) } yield (rootPath, wdlPath) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala index 1a58b849a..74a08a8fe 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActor.scala @@ -1,8 +1,9 @@ package cromwell.engine.workflow.lifecycle -import java.nio.file.FileSystem +import java.nio.file.Files import akka.actor.{ActorRef, FSM, LoggingFSM, Props} +import better.files.File import cats.data.Validated._ import cats.instances.list._ import cats.syntax.cartesian._ @@ -14,12 +15,14 @@ import com.typesafe.scalalogging.LazyLogging import cromwell.backend.BackendWorkflowDescriptor import cromwell.core.Dispatcher.EngineDispatcher import cromwell.core.WorkflowOptions.{ReadFromCache, WorkflowOption, WriteToCache} +import cromwell.core._ import cromwell.core.callcaching._ import cromwell.core.logging.WorkflowLogging +import cromwell.core.path.PathBuilder import cromwell.engine._ import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorActorData, MaterializeWorkflowDescriptorActorState} -import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.MetadataService.{PutMetadataAction, _} import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import cromwell.core.ErrorOr._ import net.ceedubs.ficus.Ficus._ @@ -41,15 +44,15 @@ object MaterializeWorkflowDescriptorActor { // exception if not initialized yet. def cromwellBackends = CromwellBackends.instance.get - def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends): Props = { - Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends)).withDispatcher(EngineDispatcher) + def props(serviceRegistryActor: ActorRef, workflowId: WorkflowId, cromwellBackends: => CromwellBackends = cromwellBackends, importLocalFilesystem: Boolean): Props = { + Props(new MaterializeWorkflowDescriptorActor(serviceRegistryActor, workflowId, cromwellBackends, importLocalFilesystem)).withDispatcher(EngineDispatcher) } /* Commands */ sealed trait MaterializeWorkflowDescriptorActorMessage - case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFiles, + case class MaterializeWorkflowDescriptorCommand(workflowSourceFiles: WorkflowSourceFilesCollection, conf: Config) extends MaterializeWorkflowDescriptorActorMessage case object MaterializeWorkflowDescriptorAbortCommand @@ -90,15 +93,17 @@ object MaterializeWorkflowDescriptorActor { } val enabled = conf.as[Option[Boolean]]("call-caching.enabled").getOrElse(false) + val invalidateBadCacheResults = conf.as[Option[Boolean]]("call-caching.invalidate-bad-cache-results").getOrElse(true) + val callCachingOptions = CallCachingOptions(invalidateBadCacheResults) if (enabled) { val readFromCache = readOptionalOption(ReadFromCache) val writeToCache = readOptionalOption(WriteToCache) (readFromCache |@| writeToCache) map { case (false, false) => CallCachingOff - case (true, false) => CallCachingActivity(ReadCache) - case (false, true) => CallCachingActivity(WriteCache) - case (true, true) => CallCachingActivity(ReadAndWriteCache) + case (true, false) => CallCachingActivity(ReadCache, callCachingOptions) + case (false, true) => CallCachingActivity(WriteCache, callCachingOptions) + case (true, true) => CallCachingActivity(ReadAndWriteCache, callCachingOptions) } } else { @@ -107,7 +112,10 @@ object MaterializeWorkflowDescriptorActor { } } -class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val workflowId: WorkflowId, cromwellBackends: => CromwellBackends) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, MaterializeWorkflowDescriptorActorData] with LazyLogging with WorkflowLogging { +class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, + val workflowIdForLogging: WorkflowId, + cromwellBackends: => CromwellBackends, + importLocalFilesystem: Boolean) extends LoggingFSM[MaterializeWorkflowDescriptorActorState, MaterializeWorkflowDescriptorActorData] with LazyLogging with WorkflowLogging { import MaterializeWorkflowDescriptorActor._ @@ -119,7 +127,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor when(ReadyToMaterializeState) { case Event(MaterializeWorkflowDescriptorCommand(workflowSourceFiles, conf), _) => - buildWorkflowDescriptor(workflowId, workflowSourceFiles, conf) match { + buildWorkflowDescriptor(workflowIdForLogging, workflowSourceFiles, conf) match { case Valid(descriptor) => sender() ! MaterializeWorkflowDescriptorSuccessResponse(descriptor) goto(MaterializationSuccessfulState) @@ -157,57 +165,57 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, + sourceFiles: WorkflowSourceFilesCollection, conf: Config): ErrorOr[EngineWorkflowDescriptor] = { - val namespaceValidation = validateNamespace(sourceFiles.wdlSource) + val namespaceValidation = validateNamespace(sourceFiles) val workflowOptionsValidation = validateWorkflowOptions(sourceFiles.workflowOptionsJson) (namespaceValidation |@| workflowOptionsValidation) map { (_, _) } flatMap { case (namespace, workflowOptions) => pushWfNameMetadataService(namespace.workflow.unqualifiedName) - val engineFileSystems = EngineFilesystems.filesystemsForWorkflow(workflowOptions)(iOExecutionContext) - buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, conf, engineFileSystems) + val pathBuilders = EngineFilesystems(context.system).pathBuildersForWorkflow(workflowOptions) + buildWorkflowDescriptor(id, sourceFiles, namespace, workflowOptions, conf, pathBuilders) } } private def pushWfNameMetadataService(name: String): Unit = { // Workflow name: - val nameEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Name), MetadataValue(name)) + val nameEvent = MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.Name), MetadataValue(name)) serviceRegistryActor ! PutMetadataAction(nameEvent) } private def buildWorkflowDescriptor(id: WorkflowId, - sourceFiles: WorkflowSourceFiles, - namespace: NamespaceWithWorkflow, + sourceFiles: WorkflowSourceFilesCollection, + namespace: WdlNamespaceWithWorkflow, workflowOptions: WorkflowOptions, conf: Config, - engineFilesystems: List[FileSystem]): ErrorOr[EngineWorkflowDescriptor] = { + pathBuilders: List[PathBuilder]): ErrorOr[EngineWorkflowDescriptor] = { val defaultBackendName = conf.as[Option[String]]("backend.default") val rawInputsValidation = validateRawInputs(sourceFiles.inputsJson) val failureModeValidation = validateWorkflowFailureMode(workflowOptions, conf) - val backendAssignmentsValidation = validateBackendAssignments(namespace.workflow.calls, workflowOptions, defaultBackendName) + val backendAssignmentsValidation = validateBackendAssignments(namespace.taskCalls, workflowOptions, defaultBackendName) val callCachingModeValidation = validateCallCachingMode(workflowOptions, conf) (rawInputsValidation |@| failureModeValidation |@| backendAssignmentsValidation |@| callCachingModeValidation ) map { (_, _, _, _) } flatMap { case (rawInputs, failureMode, backendAssignments, callCachingMode) => - buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, failureMode, engineFilesystems, callCachingMode) + buildWorkflowDescriptor(id, namespace, rawInputs, backendAssignments, workflowOptions, failureMode, pathBuilders, callCachingMode) } } private def buildWorkflowDescriptor(id: WorkflowId, - namespace: NamespaceWithWorkflow, + namespace: WdlNamespaceWithWorkflow, rawInputs: Map[String, JsValue], - backendAssignments: Map[Call, String], + backendAssignments: Map[TaskCall, String], workflowOptions: WorkflowOptions, failureMode: WorkflowFailureMode, - engineFileSystems: List[FileSystem], + pathBuilders: List[PathBuilder], callCachingMode: CallCachingMode): ErrorOr[EngineWorkflowDescriptor] = { def checkTypes(inputs: Map[FullyQualifiedName, WdlValue]): ErrorOr[Map[FullyQualifiedName, WdlValue]] = { - val allDeclarations = namespace.workflow.scopedDeclarations ++ namespace.workflow.calls.flatMap(_.scopedDeclarations) + val allDeclarations = namespace.workflow.declarations ++ namespace.workflow.calls.flatMap(_.declarations) val list: List[ErrorOr[(FullyQualifiedName, WdlValue)]] = inputs.map({ case (k, v) => allDeclarations.find(_.fullyQualifiedName == k) match { case Some(decl) if decl.wdlType.coerceRawValue(v).isFailure => @@ -223,27 +231,27 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor for { coercedInputs <- validateCoercedInputs(rawInputs, namespace) _ = pushWfInputsToMetadataService(coercedInputs) - declarations <- validateDeclarations(namespace, workflowOptions, coercedInputs, engineFileSystems) - declarationsAndInputs <- checkTypes(declarations ++ coercedInputs) - backendDescriptor = BackendWorkflowDescriptor(id, namespace, declarationsAndInputs, workflowOptions) - } yield EngineWorkflowDescriptor(backendDescriptor, coercedInputs, backendAssignments, failureMode, engineFileSystems, callCachingMode) + evaluatedWorkflowsDeclarations <- validateDeclarations(namespace, workflowOptions, coercedInputs, pathBuilders) + declarationsAndInputs <- checkTypes(evaluatedWorkflowsDeclarations ++ coercedInputs) + backendDescriptor = BackendWorkflowDescriptor(id, namespace.workflow, declarationsAndInputs, workflowOptions) + } yield EngineWorkflowDescriptor(namespace, backendDescriptor, coercedInputs, backendAssignments, failureMode, pathBuilders, callCachingMode) } private def pushWfInputsToMetadataService(workflowInputs: WorkflowCoercedInputs): Unit = { // Inputs val inputEvents = workflowInputs match { case empty if empty.isEmpty => - List(MetadataEvent.empty(MetadataKey(workflowId, None,WorkflowMetadataKeys.Inputs))) + List(MetadataEvent.empty(MetadataKey(workflowIdForLogging, None,WorkflowMetadataKeys.Inputs))) case inputs => inputs flatMap { case (inputName, wdlValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) + wdlValueToMetadataEvents(MetadataKey(workflowIdForLogging, None, s"${WorkflowMetadataKeys.Inputs}:$inputName"), wdlValue) } } serviceRegistryActor ! PutMetadataAction(inputEvents) } - private def validateBackendAssignments(calls: Seq[Call], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[Call, String]] = { + private def validateBackendAssignments(calls: Set[TaskCall], workflowOptions: WorkflowOptions, defaultBackendName: Option[String]): ErrorOr[Map[TaskCall, String]] = { val callToBackendMap = Try { calls map { call => val backendPriorities = Seq( @@ -272,7 +280,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor /** * Map a call to a backend name depending on the runtime attribute key */ - private def assignBackendUsingRuntimeAttrs(call: Call): Option[String] = { + private def assignBackendUsingRuntimeAttrs(call: TaskCall): Option[String] = { val runtimeAttributesMap = call.task.runtimeAttributes.attrs runtimeAttributesMap.get(RuntimeBackendKey) map { wdlExpr => evaluateBackendNameExpression(call.fullyQualifiedName, wdlExpr) } } @@ -287,19 +295,87 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } } - private def validateDeclarations(namespace: NamespaceWithWorkflow, + private def validateDeclarations(namespace: WdlNamespaceWithWorkflow, options: WorkflowOptions, coercedInputs: WorkflowCoercedInputs, - engineFileSystems: List[FileSystem]): ErrorOr[WorkflowCoercedInputs] = { - namespace.staticWorkflowDeclarationsRecursive(coercedInputs, new WdlFunctions(engineFileSystems)) match { + pathBuilders: List[PathBuilder]): ErrorOr[WorkflowCoercedInputs] = { + namespace.staticDeclarationsRecursive(coercedInputs, new WdlFunctions(pathBuilders)) match { case Success(d) => d.validNel case Failure(e) => s"Workflow has invalid declarations: ${e.getMessage}".invalidNel } } - private def validateNamespace(source: WdlSource): ErrorOr[NamespaceWithWorkflow] = { + private def validateImportsDirectory(zipContents: Array[Byte]): ErrorOr[File] = { + + def makeZipFile(contents: Array[Byte]): Try[File] = Try { + val dependenciesPath = Files.createTempFile("", ".zip") + Files.write(dependenciesPath, contents) + } + + def unZipFile(f: File) = Try { + val unzippedFile = f.unzip() + val unzippedFileContents = unzippedFile.toJava.listFiles().head + + if (unzippedFileContents.isDirectory) File(unzippedFileContents.getPath) + else unzippedFile + } + + val importsFile = for { + zipFile <- makeZipFile(zipContents) + unzipped <- unZipFile(zipFile) + _ <- Try(zipFile.delete(swallowIOExceptions = true)) + } yield unzipped + + importsFile match { + case Success(unzippedDirectory: File) => unzippedDirectory.validNel + case Failure(t) => t.getMessage.invalidNel + } + } + + private def validateNamespaceWithImports(w: WorkflowSourceFilesWithDependenciesZip): ErrorOr[WdlNamespaceWithWorkflow] = { + def getMetadatae(importsDir: File, prefix: String = ""): Seq[(String, File)] = { + importsDir.children.toSeq flatMap { + case f: File if f.isDirectory => getMetadatae(f, prefix + f.name + "/") + case f: File if f.name.endsWith(".wdl") => Seq((prefix + f.name, f)) + case _ => Seq.empty + } + } + + def writeMetadatae(importsDir: File) = { + import scala.collection.JavaConverters._ + + val wfImportEvents = getMetadatae(importsDir) map { case (name: String, f: File) => + val contents = Files.readAllLines(f.path).asScala.mkString(System.lineSeparator()) + MetadataEvent(MetadataKey(workflowIdForLogging, None, WorkflowMetadataKeys.SubmissionSection, WorkflowMetadataKeys.SubmissionSection_Imports, name), MetadataValue(contents)) + } + serviceRegistryActor ! PutMetadataAction(wfImportEvents) + } + + validateImportsDirectory(w.importsZip) flatMap { importsDir => + writeMetadatae(importsDir) + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.directoryResolver(importsDir), WdlNamespace.fileResolver) + } else { + List(WdlNamespace.directoryResolver(importsDir)) + } + val results = WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers) + importsDir.delete(swallowIOExceptions = true) + results.validNel + } + } + + private def validateNamespace(source: WorkflowSourceFilesCollection): ErrorOr[WdlNamespaceWithWorkflow] = { try { - NamespaceWithWorkflow.load(source).validNel + source match { + case w: WorkflowSourceFilesWithDependenciesZip => validateNamespaceWithImports(w) + case w: WorkflowSourceFilesWithoutImports => + val importResolvers: Seq[ImportResolver] = if (importLocalFilesystem) { + List(WdlNamespace.fileResolver) + } else { + List.empty + } + WdlNamespaceWithWorkflow.load(w.wdlSource, importResolvers).validNel + } } catch { case e: Exception => s"Unable to load namespace from workflow: ${e.getMessage}".invalidNel } @@ -314,7 +390,7 @@ class MaterializeWorkflowDescriptorActor(serviceRegistryActor: ActorRef, val wor } private def validateCoercedInputs(rawInputs: Map[String, JsValue], - namespace: NamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { + namespace: WdlNamespaceWithWorkflow): ErrorOr[WorkflowCoercedInputs] = { namespace.coerceRawInputs(rawInputs) match { case Success(r) => r.validNel case Failure(e: ExceptionWithErrors) => Invalid(e.errors) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala index 9614696e2..5c4b777a5 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowFinalizationActor.scala @@ -1,14 +1,15 @@ package cromwell.engine.workflow.lifecycle import akka.actor.{FSM, Props} -import cromwell.backend.AllBackendInitializationData import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} +import cromwell.backend._ import cromwell.core.Dispatcher.EngineDispatcher -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId} +import cromwell.core.{CallOutputs, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.CromwellBackends import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor._ import cromwell.engine.workflow.lifecycle.WorkflowLifecycleActor._ +import wdl4s.TaskCall import scala.util.{Failure, Success, Try} @@ -37,14 +38,14 @@ object WorkflowFinalizationActor { case object WorkflowFinalizationSucceededResponse extends WorkflowLifecycleSuccessResponse final case class WorkflowFinalizationFailedResponse(reasons: Seq[Throwable]) extends WorkflowLifecycleFailureResponse - def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, - outputStore: OutputStore, initializationData: AllBackendInitializationData): Props = { - Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, executionStore, outputStore, initializationData)).withDispatcher(EngineDispatcher) + def props(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData): Props = { + Props(new WorkflowFinalizationActor(workflowId, workflowDescriptor, jobExecutionMap, workflowOutputs, initializationData)).withDispatcher(EngineDispatcher) } } -case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: AllBackendInitializationData) +case class WorkflowFinalizationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { val tag = self.path.name @@ -62,16 +63,16 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: case Event(StartFinalizationCommand, _) => val backendFinalizationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map( - _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, executionStore, outputStore, initializationData.get(backend)) + _.workflowFinalizationActorProps(workflowDescriptor.backendDescriptor, calls, filterJobExecutionsForBackend(calls), workflowOutputs, initializationData.get(backend)) ).get actor = context.actorOf(props, backend) } yield actor } val engineFinalizationActor = Try { - context.actorOf(CopyWorkflowOutputsActor.props(workflowId, workflowDescriptor, outputStore, initializationData), + context.actorOf(CopyWorkflowOutputsActor.props(workflowIdForLogging, workflowDescriptor, workflowOutputs, initializationData), "CopyWorkflowOutputsActor") } @@ -95,6 +96,15 @@ case class WorkflowFinalizationActor(workflowId: WorkflowId, workflowDescriptor: goto(WorkflowFinalizationFailedState) } } + + // Only send to each backend the jobs that it executed + private def filterJobExecutionsForBackend(calls: Set[TaskCall]): JobExecutionMap = { + jobExecutionMap map { + case (wd, executedKeys) => wd -> (executedKeys filter { jobKey => calls.contains(jobKey.call) }) + } filter { + case (wd, keys) => keys.nonEmpty + } + } when(FinalizationInProgressState) { case Event(FinalizationSuccess, stateData) => checkForDoneAndTransition(stateData.withSuccess(sender)) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala index 2fd5d75aa..14e8a31a4 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/WorkflowInitializationActor.scala @@ -50,7 +50,7 @@ object WorkflowInitializationActor { case class BackendActorAndBackend(actor: ActorRef, backend: String) } -case class WorkflowInitializationActor(workflowId: WorkflowId, +case class WorkflowInitializationActor(workflowIdForLogging: WorkflowId, workflowDescriptor: EngineWorkflowDescriptor, serviceRegistryActor: ActorRef) extends AbortableWorkflowLifecycleActor[WorkflowInitializationActorState] { @@ -78,7 +78,7 @@ case class WorkflowInitializationActor(workflowId: WorkflowId, case Event(StartInitializationCommand, _) => val backendInitializationActors = Try { for { - (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keys.toSeq) + (backend, calls) <- workflowDescriptor.backendAssignments.groupBy(_._2).mapValues(_.keySet) props <- CromwellBackends.backendLifecycleFactoryActorByName(backend).map(factory => factory.workflowInitializationActorProps(workflowDescriptor.backendDescriptor, calls, serviceRegistryActor) ).get diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala new file mode 100644 index 000000000..80748f6dd --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/CallMetadataHelper.scala @@ -0,0 +1,135 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core._ +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import wdl4s._ +import wdl4s.values.WdlValue + +import scala.util.Random + +trait CallMetadataHelper { + + def workflowIdForCallMetadata: WorkflowId + def serviceRegistryActor: ActorRef + + def pushNewCallMetadata(callKey: CallKey, backendName: Option[String]) = { + val startEvents = List( + Option(MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now))), + backendName map { name => MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.Backend), MetadataValue(name)) } + ).flatten + + serviceRegistryActor ! PutMetadataAction(startEvents) + } + + def pushQueuedCallMetadata(diffs: Seq[WorkflowExecutionDiff]) = { + val startingEvents = for { + diff <- diffs + (jobKey, executionState) <- diff.executionStoreChanges if jobKey.isInstanceOf[BackendJobDescriptorKey] && executionState == ExecutionStatus.QueuedInCromwell + } yield MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.QueuedInCromwell)) + serviceRegistryActor ! PutMetadataAction(startingEvents) + } + + def pushStartingCallMetadata(callKey: CallKey) = { + val statusChange = MetadataEvent(metadataKeyForCall(callKey, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Starting)) + serviceRegistryActor ! PutMetadataAction(statusChange) + } + + def pushRunningCallMetadata(key: CallKey, evaluatedInputs: EvaluatedTaskInputs) = { + val inputEvents = evaluatedInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}"))) + case inputs => + inputs flatMap { + case (inputName, inputValue) => + wdlValueToMetadataEvents(metadataKeyForCall(key, s"${CallMetadataKeys.Inputs}:${inputName.unqualifiedName}"), inputValue) + } + } + + val runningEvent = List(MetadataEvent(metadataKeyForCall(key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) + + serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + } + + def pushWorkflowOutputMetadata(outputs: Map[LocallyQualifiedName, WdlValue]) = { + val events = outputs match { + case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowIdForCallMetadata, None, WorkflowMetadataKeys.Outputs))) + case _ => outputs flatMap { + case (outputName, outputValue) => + wdlValueToMetadataEvents(MetadataKey(workflowIdForCallMetadata, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) + } + } + + serviceRegistryActor ! PutMetadataAction(events) + } + + def pushSuccessfulCallMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val completionEvents = completedCallMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) + + val outputEvents = outputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}"))) + case _ => + outputs flatMap { case (lqn, outputValue) => wdlValueToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), outputValue.wdlValue) } + } + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) + } + + def pushFailedCallMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { + val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed + val completionEvents = completedCallMetadataEvents(jobKey, failedState, returnCode) + val retryableFailureEvent = MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) + val failureEvents = throwableToMetadataEvents(metadataKeyForCall(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) + + serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) + } + + def pushExecutionEventsToMetadataService(jobKey: JobKey, eventList: Seq[ExecutionEvent]) = { + def metadataEvent(k: String, value: Any) = { + val metadataValue = MetadataValue(value) + val metadataKey = metadataKeyForCall(jobKey, k) + MetadataEvent(metadataKey, metadataValue) + } + + eventList.headOption foreach { firstEvent => + // The final event is only used as the book-end for the final pairing so the name is never actually used... + val offset = firstEvent.offsetDateTime.getOffset + val now = OffsetDateTime.now.withOffsetSameInstant(offset) + val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) + val tailedEventList = eventList :+ lastEvent + val events = tailedEventList.sliding(2).zipWithIndex flatMap { + case (Seq(eventCurrent, eventNext), index) => + val eventKey = s"executionEvents[$index]" + List( + metadataEvent(s"$eventKey:description", eventCurrent.name), + metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), + metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) + ) + } + + serviceRegistryActor ! PutMetadataAction(events.toIterable) + } + } + + private def completedCallMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { + val returnCodeEvent = returnCode map { rc => + List(MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + } + + List( + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), + MetadataEvent(metadataKeyForCall(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) + ) ++ returnCodeEvent.getOrElse(List.empty) + } + + private def metadataKeyForCall(jobKey: JobKey, myKey: String) = MetadataKey(workflowIdForCallMetadata, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) + + private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala index 25c1852dc..afa48474d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/EngineJobExecutionActor.scala @@ -1,7 +1,5 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - import akka.actor.{ActorRef, ActorRefFactory, LoggingFSM, Props} import akka.routing.RoundRobinPool import cats.data.NonEmptyList @@ -14,8 +12,8 @@ import cromwell.core._ import cromwell.core.callcaching._ import cromwell.core.logging.WorkflowLogging import cromwell.core.simpleton.WdlValueSimpleton +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{BackendJobPreparationSucceeded, CallPreparationFailed} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes, HashError} import cromwell.engine.workflow.lifecycle.execution.callcaching.FetchCachedResultsActor.{CachedOutputLookupFailed, CachedOutputLookupSucceeded} import cromwell.engine.workflow.lifecycle.execution.callcaching._ @@ -23,8 +21,6 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecu import cromwell.jobstore.JobStoreActor._ import cromwell.jobstore.{Pending => _, _} import cromwell.services.SingletonServicesStore -import cromwell.services.metadata.MetadataService.PutMetadataAction -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataKey, MetadataValue} import wdl4s.TaskOutput import scala.concurrent.ExecutionContext @@ -36,17 +32,18 @@ class EngineJobExecutionActor(replyTo: ActorRef, factory: BackendLifecycleActorFactory, initializationData: Option[BackendInitializationData], restarting: Boolean, - serviceRegistryActor: ActorRef, + val serviceRegistryActor: ActorRef, jobStoreActor: ActorRef, callCacheReadActor: ActorRef, jobTokenDispenserActor: ActorRef, backendSingletonActor: Option[ActorRef], backendName: String, - callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging { + callCachingMode: CallCachingMode) extends LoggingFSM[EngineJobExecutionActorState, EJEAData] with WorkflowLogging with CallMetadataHelper { - override val workflowId = executionData.workflowDescriptor.id + override val workflowIdForLogging = executionData.workflowDescriptor.id + override val workflowIdForCallMetadata = executionData.workflowDescriptor.id - val jobTag = s"${workflowId.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" + val jobTag = s"${workflowIdForLogging.shortString}:${jobDescriptorKey.call.fullyQualifiedName}:${jobDescriptorKey.index.fromIndex}:${jobDescriptorKey.attempt}" val tag = s"EJEA_$jobTag" // There's no need to check for a cache hit again if we got preempted, or if there's no result copying actor defined @@ -78,8 +75,9 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(RequestingExecutionToken) { case Event(JobExecutionTokenDispensed(jobExecutionToken), NoData) => executionToken = Option(jobExecutionToken) + replyTo ! JobStarting(jobDescriptorKey) if (restarting) { - val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowId) + val jobStoreKey = jobDescriptorKey.toJobStoreKey(workflowIdForLogging) jobStoreActor ! QueryJobCompletion(jobStoreKey, jobDescriptorKey.call.task.outputs) goto(CheckingJobStore) } else { @@ -96,9 +94,9 @@ class EngineJobExecutionActor(replyTo: ActorRef, prepareJob() case Event(JobComplete(jobResult), NoData) => val response = jobResult match { - case JobResultSuccess(returnCode, jobOutputs) => SucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty) - case JobResultFailure(returnCode, reason, false) => FailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) - case JobResultFailure(returnCode, reason, true) => FailedRetryableResponse(jobDescriptorKey, reason, returnCode) + case JobResultSuccess(returnCode, jobOutputs) => JobSucceededResponse(jobDescriptorKey, returnCode, jobOutputs, None, Seq.empty) + case JobResultFailure(returnCode, reason, false) => JobFailedNonRetryableResponse(jobDescriptorKey, reason, returnCode) + case JobResultFailure(returnCode, reason, true) => JobFailedRetryableResponse(jobDescriptorKey, reason, returnCode) } respondAndStop(response) case Event(f: JobStoreReadFailure, NoData) => @@ -120,8 +118,8 @@ class EngineJobExecutionActor(replyTo: ActorRef, runJob(updatedData) case CallCachingOff => runJob(updatedData) } - case Event(response: BackendJobPreparationFailed, NoData) => - forwardAndStop(response) + case Event(CallPreparationFailed(jobKey: BackendJobDescriptorKey, throwable), NoData) => + respondAndStop(JobFailedNonRetryableResponse(jobKey, throwable, None)) } private val callCachingReadResultMetadataKey = "Call caching read result" @@ -156,19 +154,16 @@ class EngineJobExecutionActor(replyTo: ActorRef, when(BackendIsCopyingCachedOutputs) { // Backend copying response: - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) => saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => // Wait for the CallCacheHashes stay using data.withSuccessResponse(response) - case Event(response: SucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off + case Event(response: JobSucceededResponse, data: ResponsePendingData) => // bad hashes or cache write off saveJobCompletionToJobStore(data.withSuccessResponse(response)) case Event(response: BackendJobExecutionResponse, data @ ResponsePendingData(_, _, _, Some(cacheHit))) => response match { - case f: BackendJobFailedResponse => - invalidateCacheHit(cacheHit.cacheResultIds.head) - log.error(f.throwable, "Failed copying cache results for job {}, invalidating cache entry.", jobDescriptorKey) - goto(InvalidatingCacheEntry) + case f: BackendJobFailedResponse => invalidateCacheHitAndTransition(cacheHit.cacheResultIds.head, data, f.throwable) case _ => runJob(data) } @@ -216,10 +211,10 @@ class EngineJobExecutionActor(replyTo: ActorRef, disableCallCaching(t) stay using data.copy(hashes = Option(Failure(t))) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, Some(Success(hashes)), _)) if effectiveCallCachingMode.writeToCache => eventList ++= response.executionEvents saveCacheResults(hashes, data.withSuccessResponse(response)) - case Event(response: SucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => + case Event(response: JobSucceededResponse, data @ ResponsePendingData(_, _, None, _)) if effectiveCallCachingMode.writeToCache => log.debug(s"Got job result for {}, awaiting hashes", jobTag) stay using data.withSuccessResponse(response) case Event(response: BackendJobExecutionResponse, data: ResponsePendingData) => @@ -240,7 +235,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, case Event(JobStoreWriteSuccess(_), data: ResponseData) => forwardAndStop(data.response) case Event(JobStoreWriteFailure(t), data: ResponseData) => - respondAndStop(FailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) + respondAndStop(JobFailedNonRetryableResponse(jobDescriptorKey, new Exception(s"JobStore write failure: ${t.getMessage}", t), None)) } onTransition { @@ -266,7 +261,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def forwardAndStop(response: Any): State = { replyTo forward response returnExecutionToken() - tellEventMetadata() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } @@ -274,7 +269,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def respondAndStop(response: Any): State = { replyTo ! response returnExecutionToken() - tellEventMetadata() + pushExecutionEventsToMetadataService(jobDescriptorKey, eventList) context stop self stay() } @@ -286,7 +281,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, } private def disableCacheWrite(reason: Throwable) = { - log.error("{}: Disabling cache writing for this job.", jobTag) + log.error(reason, "{}: Disabling cache writing for this job.", jobTag) if (effectiveCallCachingMode.writeToCache) { effectiveCallCachingMode = effectiveCallCachingMode.withoutWrite writeCallCachingModeToMetadata() @@ -302,7 +297,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, val jobPreparationActorName = s"BackendPreparationActor_for_$jobTag" val jobPrepProps = JobPreparationActor.props(executionData, jobDescriptorKey, factory, initializationData, serviceRegistryActor, backendSingletonActor) val jobPreparationActor = createJobPreparationActor(jobPrepProps, jobPreparationActorName) - jobPreparationActor ! JobPreparationActor.Start + jobPreparationActor ! CallPreparationActor.Start goto(PreparingJob) } @@ -335,13 +330,13 @@ class EngineJobExecutionActor(replyTo: ActorRef, val backendCacheHitCopyingActorProps = propsMaker(data.jobDescriptor, initializationData, serviceRegistryActor) val cacheHitCopyActor = context.actorOf(backendCacheHitCopyingActorProps, buildCacheHitCopyingActorName(data.jobDescriptor)) cacheHitCopyActor ! CopyOutputsCommand(wdlValueSimpletons, jobDetritusFiles, returnCode) - replyTo ! JobRunning(data.jobDescriptor, None) + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, None) goto(BackendIsCopyingCachedOutputs) case None => // This should be impossible with the FSM, but luckily, we CAN recover if some foolish future programmer makes this happen: val errorMessage = "Call caching copying should never have even been attempted with no copy actor props! (Programmer error!)" log.error(errorMessage) - self ! FailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) + self ! JobFailedNonRetryableResponse(data.jobDescriptor.key, new RuntimeException(errorMessage), None) goto(BackendIsCopyingCachedOutputs) } } @@ -350,7 +345,7 @@ class EngineJobExecutionActor(replyTo: ActorRef, val backendJobExecutionActor = context.actorOf(data.bjeaProps, buildJobExecutionActorName(data.jobDescriptor)) val message = if (restarting) RecoverJobCommand else ExecuteJobCommand backendJobExecutionActor ! message - replyTo ! JobRunning(data.jobDescriptor, Option(backendJobExecutionActor)) + replyTo ! JobRunning(data.jobDescriptor.key, data.jobDescriptor.inputDeclarations, Option(backendJobExecutionActor)) goto(RunningJob) using data } @@ -371,19 +366,33 @@ class EngineJobExecutionActor(replyTo: ActorRef, } private def buildJobExecutionActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendJobExecutionActor-$jobTag" + s"$workflowIdForLogging-BackendJobExecutionActor-$jobTag" } private def buildCacheHitCopyingActorName(jobDescriptor: BackendJobDescriptor) = { - s"$workflowId-BackendCacheHitCopyingActor-$jobTag" + s"$workflowIdForLogging-BackendCacheHitCopyingActor-$jobTag" } - protected def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse): Unit = { + protected def createSaveCacheResultsActor(hashes: CallCacheHashes, success: JobSucceededResponse): Unit = { val callCache = new CallCache(SingletonServicesStore.databaseInterface) - context.actorOf(CallCacheWriteActor.props(callCache, workflowId, hashes, success), s"CallCacheWriteActor-$tag") + context.actorOf(CallCacheWriteActor.props(callCache, workflowIdForLogging, hashes, success), s"CallCacheWriteActor-$tag") () } + private def invalidateCacheHitAndTransition(cacheId: CallCachingEntryId, data: ResponsePendingData, reason: Throwable) = { + val invalidationRequired = effectiveCallCachingMode match { + case CallCachingOff => throw new RuntimeException("Should not be calling invalidateCacheHit if call caching is off!") // Very unexpected. Fail out of this bad-state EJEA. + case activity: CallCachingActivity => activity.options.invalidateBadCacheResults + } + if (invalidationRequired) { + log.error(reason, "Failed copying cache results for job {}, invalidating cache entry.", jobDescriptorKey) + invalidateCacheHit(cacheId) + goto(InvalidatingCacheEntry) + } else { + handleCacheInvalidatedResponse(CallCacheInvalidationUnnecessary, data) + } + } + protected def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { val callCache = new CallCache(SingletonServicesStore.databaseInterface) context.actorOf(CallCacheInvalidateActor.props(callCache, cacheId), s"CallCacheInvalidateActor${cacheId.id}-$tag") @@ -398,71 +407,37 @@ class EngineJobExecutionActor(replyTo: ActorRef, private def saveJobCompletionToJobStore(updatedData: ResponseData) = { updatedData.response match { - case SucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: JobOutputs, _, _) => saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) + case JobSucceededResponse(jobKey: BackendJobDescriptorKey, returnCode: Option[Int], jobOutputs: CallOutputs, _, _) => saveSuccessfulJobResults(jobKey, returnCode, jobOutputs) case AbortedResponse(jobKey: BackendJobDescriptorKey) => log.debug("{}: Won't save aborted job response to JobStore", jobTag) forwardAndStop(updatedData.response) - case FailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) - case FailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) + case JobFailedNonRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = false) + case JobFailedRetryableResponse(jobKey: BackendJobDescriptorKey, throwable: Throwable, returnCode: Option[Int]) => saveUnsuccessfulJobResults(jobKey, returnCode, throwable, retryable = true) } goto(UpdatingJobStore) using updatedData } - private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + private def saveSuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], outputs: CallOutputs) = { + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultSuccess(returnCode, outputs) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } private def saveUnsuccessfulJobResults(jobKey: JobKey, returnCode: Option[Int], reason: Throwable, retryable: Boolean) = { - val jobStoreKey = jobKey.toJobStoreKey(workflowId) + val jobStoreKey = jobKey.toJobStoreKey(workflowIdForLogging) val jobStoreResult = JobResultFailure(returnCode, reason, retryable) jobStoreActor ! RegisterJobCompleted(jobStoreKey, jobStoreResult) } private def writeToMetadata(keyValues: Map[String, String]) = { import cromwell.services.metadata.MetadataService.implicits.MetadataAutoPutter - serviceRegistryActor.putMetadata(workflowId, Option(jobDescriptorKey), keyValues) + serviceRegistryActor.putMetadata(workflowIdForLogging, Option(jobDescriptorKey), keyValues) } private def addHashesAndStay(data: ResponsePendingData, hashes: CallCacheHashes): State = { val updatedData = data.copy(hashes = Option(Success(hashes))) stay using updatedData } - - /** - * Fire and forget events to the metadata service - */ - private def tellEventMetadata(): Unit = { - eventList.headOption foreach { firstEvent => - // The final event is only used as the book-end for the final pairing so the name is never actually used... - val offset = firstEvent.offsetDateTime.getOffset - val now = OffsetDateTime.now.withOffsetSameInstant(offset) - val lastEvent = ExecutionEvent("!!Bring Back the Monarchy!!", now) - val tailedEventList = eventList :+ lastEvent - val events = tailedEventList.sliding(2).zipWithIndex flatMap { - case (Seq(eventCurrent, eventNext), index) => - val eventKey = s"executionEvents[$index]" - List( - metadataEvent(s"$eventKey:description", eventCurrent.name), - metadataEvent(s"$eventKey:startTime", eventCurrent.offsetDateTime), - metadataEvent(s"$eventKey:endTime", eventNext.offsetDateTime) - ) - } - - serviceRegistryActor ! PutMetadataAction(events.toIterable) - } - } - - private def metadataEvent(key: String, value: Any) = { - val metadataValue = MetadataValue(value) - MetadataEvent(metadataKey(key), metadataValue) - } - - private lazy val metadataJobKey = { - MetadataJobKey(jobDescriptorKey.call.fullyQualifiedName, jobDescriptorKey.index, jobDescriptorKey.attempt) - } - private def metadataKey(key: String) = MetadataKey(workflowId, Option(metadataJobKey), key) } object EngineJobExecutionActor { @@ -484,8 +459,6 @@ object EngineJobExecutionActor { sealed trait EngineJobExecutionActorCommand case object Execute extends EngineJobExecutionActorCommand - final case class JobRunning(jobDescriptor: BackendJobDescriptor, backendJobExecutionActor: Option[ActorRef]) - def props(replyTo: ActorRef, jobDescriptorKey: BackendJobDescriptorKey, executionData: WorkflowExecutionActorData, @@ -526,10 +499,10 @@ object EngineJobExecutionActor { hashes: Option[Try[CallCacheHashes]] = None, cacheHit: Option[CacheHit] = None) extends EJEAData { - def withSuccessResponse(success: SucceededResponse) = SucceededResponseData(success, hashes) + def withSuccessResponse(success: JobSucceededResponse) = SucceededResponseData(success, hashes) def withResponse(response: BackendJobExecutionResponse) = response match { - case success: SucceededResponse => SucceededResponseData(success, hashes) + case success: JobSucceededResponse => SucceededResponseData(success, hashes) case failure => NotSucceededResponseData(failure, hashes) } @@ -546,7 +519,7 @@ object EngineJobExecutionActor { def hashes: Option[Try[CallCacheHashes]] } - private[execution] case class SucceededResponseData(successResponse: SucceededResponse, + private[execution] case class SucceededResponseData(successResponse: JobSucceededResponse, hashes: Option[Try[CallCacheHashes]] = None) extends ResponseData { override def response = successResponse } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala new file mode 100644 index 000000000..3b9d1af1f --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/ExecutionStore.scala @@ -0,0 +1,109 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.backend.BackendJobDescriptorKey +import cromwell.core.ExecutionStatus._ +import cromwell.core.{CallKey, ExecutionStatus, JobKey} +import cromwell.engine.workflow.lifecycle.execution.ExecutionStore.ExecutionStoreEntry +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{apply => _, _} +import wdl4s._ + + +object ExecutionStore { + def empty = ExecutionStore(Map.empty[JobKey, ExecutionStatus]) + type ExecutionStoreEntry = (JobKey, ExecutionStatus) + def apply(workflow: Workflow, workflowCoercedInputs: WorkflowCoercedInputs) = { + // Only add direct children to the store, the rest is dynamically created when necessary + val keys = workflow.children map { + case call: TaskCall => Option(BackendJobDescriptorKey(call, None, 1)) + case call: WorkflowCall => Option(SubWorkflowKey(call, None, 1)) + case scatter: Scatter => Option(ScatterKey(scatter)) + case declaration: Declaration => Option(DeclarationKey(declaration, None, workflowCoercedInputs)) + case _ => None // Ifs will need to be added here when supported + } + + new ExecutionStore(keys.flatten.map(_ -> NotStarted).toMap) + } +} + +case class ExecutionStore(store: Map[JobKey, ExecutionStatus]) { + def add(values: Map[JobKey, ExecutionStatus]) = this.copy(store = store ++ values) + + // Convert the store to a `List` before `collect`ing to sidestep expensive and pointless hashing of `Scope`s when + // assembling the result. + def runnableScopes = store.toList collect { case entry if isRunnable(entry) => entry._1 } + + def backendJobKeys = store.keys.toList collect { case k: BackendJobDescriptorKey => k } + + private def isRunnable(entry: ExecutionStoreEntry) = { + entry match { + case (key, ExecutionStatus.NotStarted) => arePrerequisitesDone(key) + case _ => false + } + } + + def findShardEntries(key: CollectorKey): List[ExecutionStoreEntry] = store.toList filter { + case (k: CallKey, v) => k.scope == key.scope && k.isShard + case (k: DeclarationKey, v) => k.scope == key.scope && k.isShard + case _ => false + } + + private def arePrerequisitesDone(key: JobKey): Boolean = { + val upstream = key.scope.upstream collect { + case n: Call => upstreamEntry(key, n) + case n: Scatter => upstreamEntry(key, n) + case n: Declaration => upstreamEntry(key, n) + } + + val downstream: List[(JobKey, ExecutionStatus)] = key match { + case collector: CollectorKey => findShardEntries(collector) + case _ => Nil + } + + /* + * We need to use an "exists" in this case because the execution store can contain a job attempt with the same + * fqn and index but a preempted status. We wouldn't want that preempted attempt to count against the completion + * of the scatter block. + */ + def isDone(e: JobKey): Boolean = store exists { + case (k, s) => k.scope.fullyQualifiedName == e.scope.fullyQualifiedName && k.index == e.index && s == ExecutionStatus.Done + } + + val dependencies = upstream.flatten ++ downstream + val dependenciesResolved = dependencies forall { case (k, _) => isDone(k) } + + /* + * We need to make sure that all prerequisiteScopes have been resolved to some entry before going forward. + * If a scope cannot be resolved it may be because it is in a scatter that has not been populated yet, + * therefore there is no entry in the executionStore for this scope. + * If that's the case this prerequisiteScope has not been run yet, hence the (upstream forall {_.nonEmpty}) + */ + (upstream forall { _.nonEmpty }) && dependenciesResolved + } + + private def upstreamEntry(entry: JobKey, prerequisiteScope: Scope): Option[ExecutionStoreEntry] = { + prerequisiteScope.closestCommonAncestor(entry.scope) match { + /* + * If this entry refers to a Scope which has a common ancestor with prerequisiteScope + * and that common ancestor is a Scatter block, then find the shard with the same index + * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite + * scope, then depend on the shard (with same index). + * + * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not + * work as-is for nested scatter blocks + */ + case Some(ancestor: Scatter) => + store find { + case (k, _) => k.scope == prerequisiteScope && k.index == entry.index + } + + /* + * Otherwise, simply refer to the collector entry. This means that 'entry' depends + * on every shard of the pre-requisite scope to finish. + */ + case _ => + store find { + case (k, _) => k.scope == prerequisiteScope && k.index.isEmpty + } + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala index 12c719994..467439622 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/JobPreparationActor.scala @@ -3,88 +3,61 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.{Actor, ActorRef, Props} import cromwell.backend._ import cromwell.core.logging.WorkflowLogging -import cromwell.core.{ExecutionStore, JobKey, OutputStore} +import cromwell.core.{CallKey, JobKey, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor._ +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.SubWorkflowKey import wdl4s._ import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.util.TryUtil import wdl4s.values.WdlValue import scala.util.{Failure, Success, Try} -final case class JobPreparationActor(executionData: WorkflowExecutionActorData, - jobKey: BackendJobDescriptorKey, - factory: BackendLifecycleActorFactory, - initializationData: Option[BackendInitializationData], - serviceRegistryActor: ActorRef, - backendSingletonActor: Option[ActorRef]) - extends Actor with WdlLookup with WorkflowLogging { - - override lazy val workflowDescriptor: EngineWorkflowDescriptor = executionData.workflowDescriptor - override lazy val workflowId = workflowDescriptor.id - override lazy val executionStore: ExecutionStore = executionData.executionStore - override lazy val outputStore: OutputStore = executionData.outputStore - override lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions( - workflowDescriptor.backendDescriptor, jobKey, initializationData) - +abstract class CallPreparationActor(val workflowDescriptor: EngineWorkflowDescriptor, + val outputStore: OutputStore, + callKey: CallKey) extends Actor with WorkflowLogging { + lazy val workflowIdForLogging = workflowDescriptor.id + def expressionLanguageFunctions: WdlStandardLibraryFunctions + def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse + override def receive = { case Start => - val response = resolveAndEvaluateInputs(jobKey, expressionLanguageFunctions) map { prepareJobExecutionActor } - context.parent ! (response recover { case f => BackendJobPreparationFailed(jobKey, f) }).get + val response = resolveAndEvaluateInputs() map { prepareExecutionActor } + context.parent ! (response recover { case f => CallPreparationFailed(callKey, f) }).get context stop self case unhandled => workflowLogger.warn(self.path.name + " received an unhandled message: " + unhandled) } - // Split inputs map (= evaluated workflow declarations + coerced json inputs) into [init\.*].last - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { case (fqn, v) => splitFqn(fqn) -> v } - - def resolveAndEvaluateInputs(jobKey: BackendJobDescriptorKey, - wdlFunctions: WdlStandardLibraryFunctions): Try[Map[LocallyQualifiedName, WdlValue]] = { - import RuntimeAttributeDefinition.buildMapBasedLookup + def resolveAndEvaluateInputs(): Try[Map[Declaration, WdlValue]] = { Try { - val call = jobKey.call - lazy val callInputsFromFile = unqualifiedInputsFromInputFile(call) - lazy val workflowScopedLookup = hierarchicalLookup(jobKey.call, jobKey.index) _ - - // Try to resolve, evaluate and coerce declarations in order - val inputEvaluationAttempt = call.task.declarations.foldLeft(Map.empty[LocallyQualifiedName, Try[WdlValue]])((inputs, declaration) => { - val name = declaration.name - - // Try to resolve the declaration, and upon success evaluate the expression - // If the declaration is resolved but can't be evaluated this will throw an evaluation exception - // If it can't be resolved it's ignored and won't appear in the final input map - val evaluated: Option[Try[WdlValue]] = declaration.expression match { - // Static expression in the declaration - case Some(expr) => Option(expr.evaluate(buildMapBasedLookup(inputs), wdlFunctions)) - // Expression found in the input mappings - case None if call.inputMappings.contains(name) => Option(call.inputMappings(name).evaluate(workflowScopedLookup, wdlFunctions)) - // Expression found in the input file - case None if callInputsFromFile.contains(name) => Option(Success(callInputsFromFile(name))) - // Expression can't be found - case _ => None - } - - // Leave out unresolved declarations - evaluated match { - case Some(value) => - val coercedValue = value flatMap declaration.wdlType.coerceRawValue - inputs + ((name, coercedValue)) - case None => inputs - } - }) - - TryUtil.sequenceMap(inputEvaluationAttempt, s"Input evaluation for Call ${call.fullyQualifiedName} failed") - }.flatten + val call = callKey.scope + val scatterMap = callKey.index flatMap { i => + // Will need update for nested scatters + call.upstream collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] + + call.evaluateTaskInputs( + workflowDescriptor.backendDescriptor.inputs, + expressionLanguageFunctions, + outputStore.fetchNodeOutputEntries, + scatterMap + ) + } } +} - // Unqualified call inputs for a specific call, from the input json - private def unqualifiedInputsFromInputFile(call: Call): Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == call.fullyQualifiedName => inputName -> v - } +final case class JobPreparationActor(executionData: WorkflowExecutionActorData, + jobKey: BackendJobDescriptorKey, + factory: BackendLifecycleActorFactory, + initializationData: Option[BackendInitializationData], + serviceRegistryActor: ActorRef, + backendSingletonActor: Option[ActorRef]) + extends CallPreparationActor(executionData.workflowDescriptor, executionData.outputStore, jobKey) { - private def prepareJobExecutionActor(inputEvaluation: Map[LocallyQualifiedName, WdlValue]): JobPreparationActorResponse = { + override lazy val expressionLanguageFunctions = factory.expressionLanguageFunctions(workflowDescriptor.backendDescriptor, jobKey, initializationData) + + override def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse = { import RuntimeAttributeDefinition.{addDefaultsToAttributes, evaluateRuntimeAttributes} val curriedAddDefaultsToAttributes = addDefaultsToAttributes(factory.runtimeAttributeDefinitions(initializationData), workflowDescriptor.backendDescriptor.workflowOptions) _ @@ -95,19 +68,45 @@ final case class JobPreparationActor(executionData: WorkflowExecutionActorData, jobDescriptor = BackendJobDescriptor(workflowDescriptor.backendDescriptor, jobKey, attributesWithDefault, inputEvaluation) } yield BackendJobPreparationSucceeded(jobDescriptor, factory.jobExecutionActorProps(jobDescriptor, initializationData, serviceRegistryActor, backendSingletonActor))) match { case Success(s) => s - case Failure(f) => BackendJobPreparationFailed(jobKey, f) + case Failure(f) => CallPreparationFailed(jobKey, f) } } } -object JobPreparationActor { - sealed trait JobPreparationActorCommands - case object Start extends JobPreparationActorCommands +final case class SubWorkflowPreparationActor(executionData: WorkflowExecutionActorData, + key: SubWorkflowKey, + subWorkflowId: WorkflowId) + extends CallPreparationActor(executionData.workflowDescriptor, executionData.outputStore, key) { + + override lazy val expressionLanguageFunctions = executionData.expressionLanguageFunctions + + override def prepareExecutionActor(inputEvaluation: Map[Declaration, WdlValue]): CallPreparationActorResponse = { + val oldBackendDescriptor = workflowDescriptor.backendDescriptor + + val newBackendDescriptor = oldBackendDescriptor.copy( + id = subWorkflowId, + workflow = key.scope.calledWorkflow, + inputs = workflowDescriptor.inputs ++ (inputEvaluation map { case (k, v) => k.fullyQualifiedName -> v }), + breadCrumbs = oldBackendDescriptor.breadCrumbs :+ BackendJobBreadCrumb(workflowDescriptor.workflow, workflowDescriptor.id, key) + ) + val engineDescriptor = workflowDescriptor.copy(backendDescriptor = newBackendDescriptor, parentWorkflow = Option(workflowDescriptor)) + SubWorkflowPreparationSucceeded(engineDescriptor, inputEvaluation) + } +} - sealed trait JobPreparationActorResponse - case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends JobPreparationActorResponse - case class BackendJobPreparationFailed(jobKey: JobKey, throwable: Throwable) extends JobPreparationActorResponse +object CallPreparationActor { + sealed trait CallPreparationActorCommands + case object Start extends CallPreparationActorCommands + + sealed trait CallPreparationActorResponse + + case class BackendJobPreparationSucceeded(jobDescriptor: BackendJobDescriptor, bjeaProps: Props) extends CallPreparationActorResponse + case class SubWorkflowPreparationSucceeded(workflowDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) extends CallPreparationActorResponse + case class JobCallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse + case class CallPreparationFailed(jobKey: JobKey, throwable: Throwable) extends CallPreparationActorResponse +} +object JobPreparationActor { def props(executionData: WorkflowExecutionActorData, jobKey: BackendJobDescriptorKey, factory: BackendLifecycleActorFactory, @@ -119,3 +118,13 @@ object JobPreparationActor { Props(new JobPreparationActor(executionData, jobKey, factory, initializationData, serviceRegistryActor, backendSingletonActor)) } } + +object SubWorkflowPreparationActor { + def props(executionData: WorkflowExecutionActorData, + key: SubWorkflowKey, + subWorkflowId: WorkflowId) = { + // Note that JobPreparationActor doesn't run on the engine dispatcher as it mostly executes backend-side code + // (WDL expression evaluation using Backend's expressionLanguageFunctions) + Props(new SubWorkflowPreparationActor(executionData, key, subWorkflowId)) + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala new file mode 100644 index 000000000..02c0bc113 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/OutputStore.scala @@ -0,0 +1,98 @@ +package cromwell.engine.workflow.lifecycle.execution + +import cromwell.core.ExecutionIndex._ +import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.CollectorKey +import wdl4s.types.{WdlArrayType, WdlType} +import wdl4s.util.TryUtil +import wdl4s.values.{WdlArray, WdlCallOutputsObject, WdlValue} +import wdl4s.{Call, Declaration, GraphNode, Scope} + +import scala.language.postfixOps +import scala.util.{Failure, Success, Try} + +object OutputStore { + case class OutputEntry(name: String, wdlType: WdlType, wdlValue: Option[WdlValue]) + case class OutputCallKey(call: Scope with GraphNode, index: ExecutionIndex) + def empty = OutputStore(Map.empty) +} + +case class OutputStore(store: Map[OutputCallKey, List[OutputEntry]]) { + def add(values: Map[OutputCallKey, List[OutputEntry]]) = this.copy(store = store ++ values) + + def fetchNodeOutputEntries(node: GraphNode, index: ExecutionIndex): Try[WdlValue] = { + def outputEntriesToMap(outputs: List[OutputEntry]): Map[String, Try[WdlValue]] = { + outputs map { output => + output.wdlValue match { + case Some(wdlValue) => output.name -> Success(wdlValue) + case None => output.name -> Failure(new RuntimeException(s"Could not retrieve output ${output.name} value")) + } + } toMap + } + + def callOutputs(call: Call, outputs: List[OutputEntry]) = { + TryUtil.sequenceMap(outputEntriesToMap(outputs), s"Output fetching for call ${node.unqualifiedName}") map { outputsMap => + WdlCallOutputsObject(call, outputsMap) + } + } + + def declarationOutputs(declaration: Declaration, outputs: List[OutputEntry]) = { + outputs match { + case OutputEntry(name, _, Some(value)) :: Nil => Success(value) + case _ => Failure(new RuntimeException(s"Could not find value for declaration ${declaration.fullyQualifiedName}")) + } + } + + store.get(OutputCallKey(node, index)) match { + case Some(outputs) => + node match { + case call: Call => callOutputs(call, outputs) + case declaration: Declaration => declarationOutputs(declaration, outputs) + case other => Failure(new RuntimeException(s"Only Calls and Declarations are allowed in the OutputStore, found ${other.getClass.getSimpleName}")) + } + case None => Failure(new RuntimeException(s"Could not find scope ${node.unqualifiedName}")) + } + } + + def collectCall(call: Call, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(call, e.index) map { + case callOutputs: WdlCallOutputsObject => callOutputs.outputs + case _ => throw new RuntimeException("Call outputs should be a WdlCallOutputsObject") + } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) + } + + call.outputs map { taskOutput => + val wdlValues = shardsOutputs.map( + _.getOrElse(taskOutput.unqualifiedName, throw new RuntimeException(s"Could not retrieve output ${taskOutput.unqualifiedName}"))) + val arrayOfValues = new WdlArray(WdlArrayType(taskOutput.wdlType), wdlValues) + taskOutput.unqualifiedName -> JobOutput(arrayOfValues) + } toMap + } + + def collectDeclaration(declaration: Declaration, sortedShards: Seq[JobKey]) = Try { + val shardsOutputs = sortedShards map { e => + fetchNodeOutputEntries(declaration, e.index) getOrElse { + throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}") + } + } + + Map(declaration.unqualifiedName -> JobOutput(WdlArray(WdlArrayType(declaration.wdlType), shardsOutputs))) + } + + /** + * Try to generate output for a collector call, by collecting outputs for all of its shards. + * It's fail-fast on shard output retrieval + */ + def generateCollectorOutput(collector: CollectorKey, + shards: Iterable[JobKey]): Try[CallOutputs] = { + lazy val sortedShards = shards.toSeq sortBy { _.index.fromIndex } + + collector.scope match { + case call: Call => collectCall(call, sortedShards) + case declaration: Declaration => collectDeclaration(declaration, sortedShards) + case other => Failure(new RuntimeException(s"Cannot retrieve outputs for ${other.fullyQualifiedName}")) + } + } +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala new file mode 100644 index 000000000..ffac77610 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActor.scala @@ -0,0 +1,275 @@ +package cromwell.engine.workflow.lifecycle.execution + +import akka.actor.SupervisorStrategy.Escalate +import akka.actor.{ActorRef, FSM, LoggingFSM, OneForOneStrategy, Props, SupervisorStrategy} +import cromwell.backend.{AllBackendInitializationData, BackendLifecycleActorFactory, BackendWorkflowDescriptor} +import cromwell.core._ +import cromwell.core.logging.JobLogging +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.backend.{BackendConfiguration, BackendSingletonCollection} +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{CallPreparationFailed, Start, SubWorkflowPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata._ +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import wdl4s.EvaluatedTaskInputs + +class SubWorkflowExecutionActor(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + override val serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) extends LoggingFSM[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData] with JobLogging with WorkflowMetadataHelper with CallMetadataHelper { + + override def supervisorStrategy: SupervisorStrategy = OneForOneStrategy() { case _ => Escalate } + + private val parentWorkflow = data.workflowDescriptor + override val workflowId = parentWorkflow.id + override val workflowIdForCallMetadata = parentWorkflow.id + override def jobTag: String = key.tag + + startWith(SubWorkflowPendingState, SubWorkflowExecutionActorData.empty) + + private var eventList: Seq[ExecutionEvent] = Seq(ExecutionEvent(stateName.toString)) + + when(SubWorkflowPendingState) { + case Event(Execute, _) => + if (restarting) { + subWorkflowStoreActor ! QuerySubWorkflow(parentWorkflow.id, key) + goto(SubWorkflowCheckingStoreState) + } else { + prepareSubWorkflow(createSubWorkflowId()) + } + } + + when(SubWorkflowCheckingStoreState) { + case Event(SubWorkflowFound(entry), _) => + prepareSubWorkflow(WorkflowId.fromString(entry.subWorkflowExecutionUuid)) + case Event(_: SubWorkflowNotFound, _) => + prepareSubWorkflow(createSubWorkflowId()) + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command, starting sub workflow with fresh ID.") + prepareSubWorkflow(createSubWorkflowId()) + } + + when(SubWorkflowPreparingState) { + case Event(SubWorkflowPreparationSucceeded(subWorkflowEngineDescriptor, inputs), _) => + startSubWorkflow(subWorkflowEngineDescriptor, inputs) + case Event(failure: CallPreparationFailed, _) => + context.parent ! SubWorkflowFailedResponse(key, Map.empty, failure.throwable) + context stop self + stay() + } + + when(SubWorkflowRunningState) { + case Event(WorkflowExecutionSucceededResponse(executedJobKeys, outputs), _) => + context.parent ! SubWorkflowSucceededResponse(key, executedJobKeys, outputs) + goto(SubWorkflowSucceededState) + case Event(WorkflowExecutionFailedResponse(executedJobKeys, reason), _) => + context.parent ! SubWorkflowFailedResponse(key, executedJobKeys, reason) + goto(SubWorkflowFailedState) + case Event(WorkflowExecutionAbortedResponse(executedJobKeys), _) => + context.parent ! SubWorkflowAbortedResponse(key, executedJobKeys) + goto(SubWorkflowAbortedState) + } + + when(SubWorkflowSucceededState) { FSM.NullFunction } + when(SubWorkflowFailedState) { FSM.NullFunction } + when(SubWorkflowAbortedState) { FSM.NullFunction } + + whenUnhandled { + case Event(SubWorkflowStoreRegisterSuccess(command), _) => + // Nothing to do here + stay() + case Event(SubWorkflowStoreCompleteSuccess(command), _) => + // Nothing to do here + stay() + case Event(SubWorkflowStoreFailure(command, reason), _) => + jobLogger.error(reason, s"SubWorkflowStore failure for command $command") + stay() + case Event(MetadataPutFailed(action, error), _) => + jobLogger.warn(s"Put failed for Metadata action $action", error) + stay() + case Event(MetadataPutAcknowledgement(_), _) => stay() + } + + onTransition { + case (fromState, toState) => + stateData.subWorkflowId foreach { id => pushCurrentStateToMetadataService(id, toState.workflowState) } + } + + onTransition { + case (fromState, subWorkflowTerminalState: SubWorkflowTerminalState) => + stateData.subWorkflowId match { + case Some(id) => + pushWorkflowEnd(id) + pushExecutionEventsToMetadataService(key, eventList) + case None => jobLogger.error("Sub workflow completed without a Sub Workflow UUID.") + } + context stop self + } + + onTransition { + case fromState -> toState => eventList :+= ExecutionEvent(toState.toString) + } + + private def startSubWorkflow(subWorkflowEngineDescriptor: EngineWorkflowDescriptor, inputs: EvaluatedTaskInputs) = { + val subWorkflowActor = createSubWorkflowActor(subWorkflowEngineDescriptor) + + subWorkflowActor ! WorkflowExecutionActor.ExecuteWorkflowCommand + context.parent ! JobRunning(key, inputs, Option(subWorkflowActor)) + pushWorkflowRunningMetadata(subWorkflowEngineDescriptor.backendDescriptor, inputs) + + goto(SubWorkflowRunningState) + } + + private def prepareSubWorkflow(subWorkflowId: WorkflowId) = { + createSubWorkflowPreparationActor(subWorkflowId) ! Start + context.parent ! JobStarting(key) + pushCurrentStateToMetadataService(subWorkflowId, WorkflowRunning) + pushWorkflowStart(subWorkflowId) + goto(SubWorkflowPreparingState) using SubWorkflowExecutionActorData(Option(subWorkflowId)) + } + + def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = { + context.actorOf( + SubWorkflowPreparationActor.props(data, key, subWorkflowId), + s"$subWorkflowId-SubWorkflowPreparationActor-${key.tag}" + ) + } + + def createSubWorkflowActor(subWorkflowEngineDescriptor: EngineWorkflowDescriptor) = { + context.actorOf( + WorkflowExecutionActor.props( + subWorkflowEngineDescriptor, + serviceRegistryActor, + jobStoreActor, + subWorkflowStoreActor, + callCacheReadActor, + jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting + ), + s"${subWorkflowEngineDescriptor.id}-SubWorkflowActor-${key.tag}" + ) + } + + private def pushWorkflowRunningMetadata(subWorkflowDescriptor: BackendWorkflowDescriptor, workflowInputs: EvaluatedTaskInputs) = { + val subWorkflowId = subWorkflowDescriptor.id + val parentWorkflowMetadataKey = MetadataKey(parentWorkflow.id, Option(MetadataJobKey(key.scope.fullyQualifiedName, key.index, key.attempt)), CallMetadataKeys.SubWorkflowId) + + val events = List( + MetadataEvent(parentWorkflowMetadataKey, MetadataValue(subWorkflowId)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.Name), MetadataValue(key.scope.callable.unqualifiedName)), + MetadataEvent(MetadataKey(subWorkflowId, None, WorkflowMetadataKeys.ParentWorkflowId), MetadataValue(parentWorkflow.id)) + ) + + val inputEvents = workflowInputs match { + case empty if empty.isEmpty => + List(MetadataEvent.empty(MetadataKey(subWorkflowId, None,WorkflowMetadataKeys.Inputs))) + case inputs => + inputs flatMap { case (inputName, wdlValue) => + wdlValueToMetadataEvents(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.Inputs}:${inputName.unqualifiedName}"), wdlValue) + } + } + + val workflowRootEvents = buildWorkflowRootMetadataEvents(subWorkflowDescriptor) + + serviceRegistryActor ! PutMetadataAction(events ++ inputEvents ++ workflowRootEvents) + } + + private def buildWorkflowRootMetadataEvents(subWorkflowDescriptor: BackendWorkflowDescriptor) = { + val subWorkflowId = subWorkflowDescriptor.id + + factories flatMap { + case (backendName, factory) => + BackendConfiguration.backendConfigurationDescriptor(backendName).toOption map { config => + backendName -> factory.getWorkflowExecutionRootPath(subWorkflowDescriptor, config.backendConfig, initializationData.get(backendName)) + } + } map { + case (backend, wfRoot) => + MetadataEvent(MetadataKey(subWorkflowId, None, s"${WorkflowMetadataKeys.WorkflowRoot}[$backend]"), MetadataValue(wfRoot.toAbsolutePath)) + } + } + + private def createSubWorkflowId() = { + val subWorkflowId = WorkflowId.randomId() + // Register ID to the sub workflow store + subWorkflowStoreActor ! RegisterSubWorkflow(parentWorkflow.rootWorkflow.id, parentWorkflow.id, key, subWorkflowId) + subWorkflowId + } +} + +object SubWorkflowExecutionActor { + sealed trait SubWorkflowExecutionActorState { + def workflowState: WorkflowState + } + sealed trait SubWorkflowTerminalState extends SubWorkflowExecutionActorState + + case object SubWorkflowPendingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowCheckingStoreState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowPreparingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowRunningState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowRunning + } + case object SubWorkflowAbortingState extends SubWorkflowExecutionActorState { + override val workflowState = WorkflowAborting + } + + case object SubWorkflowSucceededState extends SubWorkflowTerminalState { + override val workflowState = WorkflowSucceeded + } + case object SubWorkflowAbortedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowAborted + } + case object SubWorkflowFailedState extends SubWorkflowTerminalState { + override val workflowState = WorkflowFailed + } + + object SubWorkflowExecutionActorData { + def empty = SubWorkflowExecutionActorData(None) + } + case class SubWorkflowExecutionActorData(subWorkflowId: Option[WorkflowId]) + + sealed trait EngineWorkflowExecutionActorCommand + case object Execute + + def props(key: SubWorkflowKey, + data: WorkflowExecutionActorData, + factories: Map[String, BackendLifecycleActorFactory], + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) = { + Props(new SubWorkflowExecutionActor( + key, + data, + factories, + serviceRegistryActor, + jobStoreActor, + subWorkflowStoreActor, + callCacheReadActor, + jobTokenDispenserActor, + backendSingletonCollection, + initializationData, + restarting) + ) + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala deleted file mode 100644 index 8b2af57ea..000000000 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WdlLookup.scala +++ /dev/null @@ -1,106 +0,0 @@ -package cromwell.engine.workflow.lifecycle.execution - -import cromwell.core.{ExecutionIndex, ExecutionStore, OutputStore} -import cromwell.engine.EngineWorkflowDescriptor -import ExecutionIndex._ -import wdl4s._ -import wdl4s.expression.WdlStandardLibraryFunctions -import wdl4s.values.{WdlArray, WdlCallOutputsObject, WdlValue} - -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -trait WdlLookup { - - def workflowDescriptor: EngineWorkflowDescriptor - def executionStore: ExecutionStore - def outputStore: OutputStore - def expressionLanguageFunctions: WdlStandardLibraryFunctions - - private lazy val splitInputs = workflowDescriptor.backendDescriptor.inputs map { - case (fqn, v) => splitFqn(fqn) -> v - } - - // Unqualified workflow level inputs - private lazy val unqualifiedWorkflowInputs: Map[LocallyQualifiedName, WdlValue] = splitInputs collect { - case((root, inputName), v) if root == workflowDescriptor.namespace.workflow.unqualifiedName => inputName -> v - } - - /** - * Lookup an identifier by - * first looking at the completed calls map - * and if not found traversing up the scope hierarchy from the scope from which the lookup originated. - */ - def hierarchicalLookup(scope: Scope, index: ExecutionIndex)(identifier: String): WdlValue = { - // First lookup calls - lookupCall(scope, index, identifier) recoverWith { - // Lookup in the same scope (currently no scope support this but say we have scatter declarations, or multiple scatter variables, or nested workflows..) - case _: VariableNotFoundException | _: WdlExpressionException => scopedLookup(scope, index, identifier) - } recover { - // Lookup parent if present - case _: VariableNotFoundException | _: WdlExpressionException => scope.parent match { - case Some(parent) => hierarchicalLookup(parent, index)(identifier) - case None => throw new VariableNotFoundException(s"Can't find $identifier") - } - } get - } - - private def scopedLookup(scope: Scope, index: ExecutionIndex, identifier: String): Try[WdlValue] = { - def scopedLookupFunction = scope match { - case scatter: Scatter if index.isDefined => lookupScatter(scatter, index.get) _ - case workflow: Workflow => lookupWorkflowDeclaration _ - case _ => (_: String) => Failure(new VariableNotFoundException(s"Can't find $identifier in scope $scope")) - } - - scopedLookupFunction(identifier) - } - - // In this case, the scopedLookup function is effectively equivalent to looking into unqualifiedWorkflowInputs for the value - // because the resolution / evaluation / coercion has already happened in the MaterializeWorkflowDescriptorActor - private def lookupWorkflowDeclaration(identifier: String) = { - unqualifiedWorkflowInputs.get(identifier) match { - case Some(value) => Success(value) - case None => Failure(new WdlExpressionException(s"Could not resolve variable $identifier as a workflow input")) - } - } - - private def lookupScatter(scatter: Scatter, index: Int)(identifier: String): Try[WdlValue] = { - if (identifier == scatter.item) { - // Scatters are not indexed yet (they can't be nested) - val scatterLookup = hierarchicalLookup(scatter, None) _ - scatter.collection.evaluate(scatterLookup, expressionLanguageFunctions) map { - case collection: WdlArray if collection.value.isDefinedAt(index) => collection.value(index) - case collection: WdlArray => throw new RuntimeException(s"Index $index out of bound in $collection for scatter ${scatter.fullyQualifiedName}") - case other => throw new RuntimeException(s"Scatter ${scatter.fullyQualifiedName} collection is not an array: $other") - } recover { - case e => throw new RuntimeException(s"Failed to evaluate collection for scatter ${scatter.fullyQualifiedName}", e) - } - } else { - Failure(new VariableNotFoundException(identifier)) - } - } - - private def lookupCall(scope: Scope, scopeIndex: ExecutionIndex, identifier: String): Try[WdlCallOutputsObject] = { - val calls = executionStore.store.keys.view map { _.scope } collect { case c: Call => c } - - calls find { _.unqualifiedName == identifier } match { - case Some(matchedCall) => - /** - * After matching the Call, this determines if the `key` depends on a single shard - * of a scatter'd job or if it depends on the whole thing. Right now, the heuristic - * is "If we're both in a scatter block together, then I depend on a shard. If not, - * I depend on the collected value" - * - * TODO: nested-scatter - this will likely not be sufficient for nested scatters - */ - val index: ExecutionIndex = matchedCall.closestCommonAncestor(scope) flatMap { - case s: Scatter => scopeIndex - case _ => None - } - - outputStore.fetchCallOutputEntries(matchedCall, index) - case None => Failure(new WdlExpressionException(s"Could not find a call with identifier '$identifier'")) - } - } - -} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala index b7358296b..10f0bf377 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActor.scala @@ -1,316 +1,76 @@ package cromwell.engine.workflow.lifecycle.execution -import java.time.OffsetDateTime - -import akka.actor.SupervisorStrategy.{Escalate, Stop} import akka.actor._ import cats.data.NonEmptyList import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand -import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey} -import cromwell.core.Dispatcher.EngineDispatcher +import cromwell.backend.{AllBackendInitializationData, BackendJobDescriptorKey, JobExecutionMap} +import cromwell.core.Dispatcher._ import cromwell.core.ExecutionIndex._ import cromwell.core.ExecutionStatus._ -import cromwell.core.ExecutionStore.ExecutionStoreEntry -import cromwell.core.OutputStore.OutputEntry import cromwell.core.WorkflowOptions.WorkflowFailureMode import cromwell.core._ import cromwell.core.logging.WorkflowLogging import cromwell.engine.backend.{BackendSingletonCollection, CromwellBackends} -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.JobRunning -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.BackendJobPreparationFailed -import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.WorkflowExecutionActorState +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{apply => _, _} import cromwell.engine.workflow.lifecycle.{EngineLifecycleActorAbortCommand, EngineLifecycleActorAbortedResponse} import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} -import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata._ +import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, MetadataPutFailed} +import cromwell.util.{StopAndLogSupervisor, TryUtil} import cromwell.webservice.EngineStatsActor import lenthall.exception.ThrowableAggregation import net.ceedubs.ficus.Ficus._ -import wdl4s.types.WdlArrayType -import wdl4s.util.TryUtil import wdl4s.values.{WdlArray, WdlValue} import wdl4s.{Scope, _} import scala.annotation.tailrec import scala.language.postfixOps -import scala.util.{Failure, Random, Success, Try} - -object WorkflowExecutionActor { - - /** - * States - */ - sealed trait WorkflowExecutionActorState { def terminal = false } - sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { override val terminal = true } - - case object WorkflowExecutionPendingState extends WorkflowExecutionActorState - case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState - case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState - case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState - case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState - - /** - * Commands - */ - sealed trait WorkflowExecutionActorCommand - case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand - case object RestartExecutingWorkflowCommand extends WorkflowExecutionActorCommand - - /** - * Responses - */ - sealed trait WorkflowExecutionActorResponse { - def executionStore: ExecutionStore - - def outputStore: OutputStore - } - - case class WorkflowExecutionSucceededResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionSucceededResponse" - } - - case class WorkflowExecutionAbortedResponse(executionStore: ExecutionStore, outputStore: OutputStore) - extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { - override def toString = "WorkflowExecutionAbortedResponse" - } - - final case class WorkflowExecutionFailedResponse(executionStore: ExecutionStore, outputStore: OutputStore, - reasons: Seq[Throwable]) extends WorkflowExecutionActorResponse { - override def toString = "WorkflowExecutionFailedResponse" - } - - /** - * Internal control flow messages - */ - private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) - private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) - private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: JobOutputs) - - /** - * Internal ADTs - */ - case class ScatterKey(scope: Scatter) extends JobKey { - override val index = None // When scatters are nested, this might become Some(_) - override val attempt = 1 - override val tag = scope.unqualifiedName - - /** - * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. - * - * @param count Number of ways to scatter the children. - * @return ExecutionStore of scattered children. - */ - def populate(count: Int): Map[JobKey, ExecutionStatus.Value] = { - val keys = this.scope.children flatMap { explode(_, count) } - keys map { _ -> ExecutionStatus.NotStarted } toMap - } - - private def explode(scope: Scope, count: Int): Seq[JobKey] = { - scope match { - case call: Call => - val shards = (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } - shards :+ CollectorKey(call) - case scatter: Scatter => - throw new UnsupportedOperationException("Nested Scatters are not supported (yet).") - case e => - throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") - } - } - } - - // Represents a scatter collection for a call in the execution store - case class CollectorKey(scope: Call) extends JobKey { - override val index = None - override val attempt = 1 - override val tag = s"Collector-${scope.unqualifiedName}" - } - - case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { - override val throwables = exceptions.toList - override val exceptionContext = s"WorkflowExecutionActor" - } - - def props(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection, - initializationData: AllBackendInitializationData, - restarting: Boolean): Props = { - Props(WorkflowExecutionActor(workflowId, workflowDescriptor, serviceRegistryActor, jobStoreActor, - callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) - } - - implicit class EnhancedExecutionStore(val executionStore: ExecutionStore) extends AnyVal { - // Convert the store to a `List` before `collect`ing to sidestep expensive and pointless hashing of `Scope`s when - // assembling the result. - def runnableScopes = executionStore.store.toList collect { case entry if isRunnable(entry) => entry._1 } - - private def isRunnable(entry: ExecutionStoreEntry) = { - entry match { - case (key, ExecutionStatus.NotStarted) => arePrerequisitesDone(key) - case _ => false - } - } - - def findShardEntries(key: CollectorKey): List[ExecutionStoreEntry] = executionStore.store.toList collect { - case (k: BackendJobDescriptorKey, v) if k.scope == key.scope && k.isShard => (k, v) - } - - private def arePrerequisitesDone(key: JobKey): Boolean = { - val upstream = key.scope.prerequisiteScopes.toList.map(s => upstreamEntries(key, s)) - val downstream = key match { - case collector: CollectorKey => findShardEntries(collector) - case _ => Nil - } - - val dependencies = upstream.flatten ++ downstream - val dependenciesResolved = dependencies forall { case (_, s) => s == ExecutionStatus.Done } - - /** - * We need to make sure that all prerequisiteScopes have been resolved to some entry before going forward. - * If a scope cannot be resolved it may be because it is in a scatter that has not been populated yet, - * therefore there is no entry in the executionStore for this scope. - * If that's the case this prerequisiteScope has not been run yet, hence the (upstream forall {_.nonEmpty}) - */ - (upstream forall { _.nonEmpty }) && dependenciesResolved - } - - private def upstreamEntries(entry: JobKey, prerequisiteScope: Scope): Seq[ExecutionStoreEntry] = { - prerequisiteScope.closestCommonAncestor(entry.scope) match { - /** - * If this entry refers to a Scope which has a common ancestor with prerequisiteScope - * and that common ancestor is a Scatter block, then find the shard with the same index - * as 'entry'. In other words, if you're in the same scatter block as your pre-requisite - * scope, then depend on the shard (with same index). - * - * NOTE: this algorithm was designed for ONE-LEVEL of scattering and probably does not - * work as-is for nested scatter blocks - */ - case Some(ancestor: Scatter) => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index == entry.index - } toSeq - - /** - * Otherwise, simply refer to the entry the collector entry. This means that 'entry' depends - * on every shard of the pre-requisite scope to finish. - */ - case _ => - executionStore.store filter { - case (k, _) => k.scope == prerequisiteScope && k.index.isEmpty - } toSeq - } - } - } - - implicit class EnhancedOutputStore(val outputStore: OutputStore) extends AnyVal { - /** - * Try to generate output for a collector call, by collecting outputs for all of its shards. - * It's fail-fast on shard output retrieval - */ - def generateCollectorOutput(collector: CollectorKey, - shards: Iterable[BackendJobDescriptorKey]): Try[JobOutputs] = Try { - val shardsOutputs = shards.toSeq sortBy { _.index.fromIndex } map { e => - outputStore.fetchCallOutputEntries(e.scope, e.index) map { - _.outputs - } getOrElse(throw new RuntimeException(s"Could not retrieve output for shard ${e.scope} #${e.index}")) - } - collector.scope.task.outputs map { taskOutput => - val wdlValues = shardsOutputs.map( - _.getOrElse(taskOutput.name, throw new RuntimeException(s"Could not retrieve output ${taskOutput.name}"))) - val arrayOfValues = new WdlArray(WdlArrayType(taskOutput.wdlType), wdlValues) - taskOutput.name -> JobOutput(arrayOfValues) - } toMap - } - } -} - -final case class WorkflowExecutionActor(workflowId: WorkflowId, - workflowDescriptor: EngineWorkflowDescriptor, - serviceRegistryActor: ActorRef, - jobStoreActor: ActorRef, - callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef, - backendSingletonCollection: BackendSingletonCollection, - initializationData: AllBackendInitializationData, - restarting: Boolean) - extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging { - - import WorkflowExecutionActor._ - - override def supervisorStrategy = AllForOneStrategy() { - case ex: ActorInitializationException => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(ex)) - context.stop(self) - Stop - case t => super.supervisorStrategy.decider.applyOrElse(t, (_: Any) => Escalate) - } - - val tag = s"WorkflowExecutionActor [UUID(${workflowId.shortString})]" - private lazy val DefaultMaxRetriesFallbackValue = 10 - +import scala.util.{Failure, Success, Try} + +case class WorkflowExecutionActor(workflowDescriptor: EngineWorkflowDescriptor, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean) + extends LoggingFSM[WorkflowExecutionActorState, WorkflowExecutionActorData] with WorkflowLogging with CallMetadataHelper with StopAndLogSupervisor { + implicit val ec = context.dispatcher + + override val workflowIdForLogging = workflowDescriptor.id + override val workflowIdForCallMetadata = workflowDescriptor.id - val MaxRetries = ConfigFactory.load().as[Option[Int]]("system.max-retries") match { + private val tag = s"WorkflowExecutionActor [UUID(${workflowDescriptor.id.shortString})]" + private val MaxRetries = ConfigFactory.load().as[Option[Int]]("system.max-retries") match { case Some(value) => value case None => workflowLogger.warn(s"Failed to load the max-retries value from the configuration. Defaulting back to a value of '$DefaultMaxRetriesFallbackValue'.") DefaultMaxRetriesFallbackValue } - - private val factories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => + + private val backendFactories = TryUtil.sequenceMap(workflowDescriptor.backendAssignments.values.toSet[String] map { backendName => backendName -> CromwellBackends.backendLifecycleFactoryActorByName(backendName) } toMap) recover { case e => throw new RuntimeException("Could not instantiate backend factories", e) } get - // Initialize the StateData with ExecutionStore (all calls as NotStarted) and SymbolStore startWith( WorkflowExecutionPendingState, WorkflowExecutionActorData( workflowDescriptor, - executionStore = buildInitialExecutionStore(), + executionStore = ExecutionStore(workflowDescriptor.backendDescriptor.workflow, workflowDescriptor.inputs), backendJobExecutionActors = Map.empty, + engineCallExecutionActors = Map.empty, + subWorkflowExecutionActors = Map.empty, + downstreamExecutionMap = Map.empty, outputStore = OutputStore.empty ) ) - private def buildInitialExecutionStore(): ExecutionStore = { - val workflow = workflowDescriptor.backendDescriptor.workflowNamespace.workflow - // Only add direct children to the store, the rest is dynamically created when necessary - val keys = workflow.children map { - case call: Call => BackendJobDescriptorKey(call, None, 1) - case scatter: Scatter => ScatterKey(scatter) - } - - ExecutionStore(keys.map(_ -> NotStarted).toMap) - } - - private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable) = { - val mergedStateData = stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(failedJobKey -> ExecutionStatus.Failed))) - .removeBackendJobExecutionActor(failedJobKey) - - if (workflowDescriptor.getWorkflowOption(WorkflowFailureMode).contains(ContinueWhilePossible.toString)) { - mergedStateData.workflowCompletionStatus match { - case Some(completionStatus) if completionStatus == Failed => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - case _ => - stay() using startRunnableScopes(mergedStateData) - } - } else { - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(reason)) - goto(WorkflowExecutionFailedState) using mergedStateData - } - } - when(WorkflowExecutionPendingState) { case Event(ExecuteWorkflowCommand, stateData) => val data = startRunnableScopes(stateData) @@ -318,32 +78,64 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } when(WorkflowExecutionInProgressState) { - case Event(JobRunning(jobDescriptor, backendJobExecutionActor), stateData) => - pushRunningJobMetadata(jobDescriptor) + case Event(JobStarting(jobKey), stateData) => + pushStartingCallMetadata(jobKey) stay() using stateData - .addBackendJobExecutionActor(jobDescriptor.key, backendJobExecutionActor) - .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobDescriptor.key -> ExecutionStatus.Running))) - case Event(BackendJobPreparationFailed(jobKey, throwable), stateData) => - pushFailedJobMetadata(jobKey, None, throwable, retryableFailure = false) - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(throwable)) - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) - handleJobSuccessful(jobKey, callOutputs, stateData) - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - workflowLogger.warn(s"Job ${jobKey.tag} failed with a retryable failure: ${reason.getMessage}") - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = true) - handleRetryableFailure(jobKey, reason, returnCode) - case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) - handleNonRetryableFailure(stateData, jobKey, reason) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + case Event(JobRunning(key, inputs, callExecutionActor), stateData) => + pushRunningCallMetadata(key, inputs) + stay() using stateData + .addCallExecutionActor(key, callExecutionActor) + .mergeExecutionDiff(WorkflowExecutionDiff(Map(key -> ExecutionStatus.Running))) + + //Success + // Job + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + pushSuccessfulCallMetadata(jobKey, returnCode, callOutputs) + handleCallSuccessful(jobKey, callOutputs, stateData, Map.empty) + // Sub Workflow + case Event(SubWorkflowSucceededResponse(jobKey, descendantJobKeys, callOutputs), stateData) => + pushSuccessfulCallMetadata(jobKey, None, callOutputs) + handleCallSuccessful(jobKey, callOutputs, stateData, descendantJobKeys) + // Scatter case Event(ScatterCollectionSucceededResponse(jobKey, callOutputs), stateData) => - handleJobSuccessful(jobKey, callOutputs, stateData) + handleCallSuccessful(jobKey, callOutputs, stateData, Map.empty) + // Declaration + case Event(DeclarationEvaluationSucceededResponse(jobKey, callOutputs), stateData) => + handleDeclarationEvaluationSuccessful(jobKey, callOutputs, stateData) + + // Failure + // Initialization + case Event(JobInitializationFailed(jobKey, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Job Non Retryable + case Event(JobFailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, Map.empty) + // Job Retryable + case Event(JobFailedRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = true) + handleRetryableFailure(jobKey, reason, returnCode) + // Sub Workflow - sub workflow failures are always non retryable + case Event(SubWorkflowFailedResponse(jobKey, descendantJobKeys, reason), stateData) => + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) + handleNonRetryableFailure(stateData, jobKey, reason, descendantJobKeys) + case Event(DeclarationEvaluationFailedResponse(jobKey, reason), stateData) => + handleDeclarationEvaluationFailure(jobKey, reason, stateData) } + when(WorkflowExecutionAbortingState) { + case Event(AbortedResponse(jobKey), stateData) => + handleCallAborted(stateData, jobKey, Map.empty) + case Event(SubWorkflowAbortedResponse(jobKey, executedKeys), stateData) => + handleCallAborted(stateData, jobKey, executedKeys) + case Event(SubWorkflowSucceededResponse(subKey, executedKeys, _), stateData) => + handleCallAborted(stateData, subKey, executedKeys) + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + handleCallAborted(stateData, jobKey, Map.empty) + } + when(WorkflowExecutionSuccessfulState) { FSM.NullFunction } @@ -359,51 +151,63 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, */ private def alreadyFailedMopUp: StateFunction = { case Event(JobInitializationFailed(jobKey, reason), stateData) => - pushFailedJobMetadata(jobKey, None, reason, retryableFailure = false) + pushFailedCallMetadata(jobKey, None, reason, retryableFailure = false) stay - case Event(FailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = false) + case Event(JobFailedNonRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = false) stay - case Event(FailedRetryableResponse(jobKey, reason, returnCode), stateData) => - pushFailedJobMetadata(jobKey, returnCode, reason, retryableFailure = true) + case Event(JobFailedRetryableResponse(jobKey, reason, returnCode), stateData) => + pushFailedCallMetadata(jobKey, returnCode, reason, retryableFailure = true) stay - case Event(SucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => - pushSuccessfulJobMetadata(jobKey, returnCode, callOutputs) + case Event(JobSucceededResponse(jobKey, returnCode, callOutputs, _, _), stateData) => + pushSuccessfulCallMetadata(jobKey, returnCode, callOutputs) stay } - when(WorkflowExecutionAbortingState) { - case Event(AbortedResponse(jobKey), stateData) => - workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") - val newStateData = stateData.removeBackendJobExecutionActor(jobKey) - if (newStateData.backendJobExecutionActors.isEmpty) { - workflowLogger.info(s"$tag all jobs aborted") - goto(WorkflowExecutionAbortedState) - } else { - stay() using newStateData + + def handleTerminated(actorRef: ActorRef) = { + // Both of these Should Never Happen (tm), assuming the state data is set correctly on EJEA creation. + // If they do, it's a big programmer error and the workflow execution fails. + val jobKey = stateData.engineCallExecutionActors.getOrElse(actorRef, throw new RuntimeException("Programmer Error: An EJEA has terminated but was not assigned a jobKey")) + val jobStatus = stateData.executionStore.store.getOrElse(jobKey, throw new RuntimeException("Programmer Error: An EJEA representing a jobKey which this workflow is not running has sent up a terminated message.")) + + if (!jobStatus.isTerminal) { + val terminationException = getFailureCause(actorRef) match { + case Some(e) => new RuntimeException("Unexpected failure in EJEA.", e) + case None => new RuntimeException("Unexpected failure in EJEA (root cause not captured).") } + self ! JobFailedNonRetryableResponse(jobKey, terminationException, None) + } + + stay } whenUnhandled { + case Event(Terminated(actorRef), stateData) => handleTerminated(actorRef) using stateData.removeEngineJobExecutionActor(actorRef) case Event(MetadataPutFailed(action, error), _) => // Do something useful here?? - workflowLogger.warn(s"$tag Put failed for Metadata action $action : ${error.getMessage}") - stay + workflowLogger.warn(s"$tag Put failed for Metadata action $action", error) + stay() case Event(MetadataPutAcknowledgement(_), _) => stay() case Event(EngineLifecycleActorAbortCommand, stateData) => - if (stateData.backendJobExecutionActors.nonEmpty) { - log.info(s"$tag: Abort received. Aborting ${stateData.backendJobExecutionActors.size} EJEAs") - stateData.backendJobExecutionActors.values foreach {_ ! AbortJobCommand} + if (stateData.hasRunningActors) { + log.info(s"$tag: Abort received. " + + s"Aborting ${stateData.backendJobExecutionActors.size} Job Execution Actors" + + s"and ${stateData.subWorkflowExecutionActors.size} Sub Workflow Execution Actors" + ) + stateData.backendJobExecutionActors.values foreach { _ ! AbortJobCommand } + stateData.subWorkflowExecutionActors.values foreach { _ ! EngineLifecycleActorAbortCommand } goto(WorkflowExecutionAbortingState) } else { goto(WorkflowExecutionAbortedState) } case Event(EngineStatsActor.JobCountQuery, data) => sender ! EngineStatsActor.JobCount(data.backendJobExecutionActors.size) + data.subWorkflowExecutionActors.values foreach { _ forward EngineStatsActor.JobCountQuery } stay() case unhandledMessage => workflowLogger.warn(s"$tag received an unhandled message: ${unhandledMessage.event} in state: $stateName") - stay + stay() } onTransition { @@ -415,11 +219,61 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } onTransition { - case _ -> WorkflowExecutionSuccessfulState => - pushWorkflowOutputMetadata(nextStateData) - context.parent ! WorkflowExecutionSucceededResponse(nextStateData.executionStore, nextStateData.outputStore) case _ -> WorkflowExecutionAbortedState => - context.parent ! WorkflowExecutionAbortedResponse(nextStateData.executionStore, nextStateData.outputStore) + context.parent ! WorkflowExecutionAbortedResponse(nextStateData.jobExecutionMap) + } + + private def handleNonRetryableFailure(stateData: WorkflowExecutionActorData, failedJobKey: JobKey, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = stateData + .removeCallExecutionActor(failedJobKey) + .addExecutions(jobExecutionMap) + + handleExecutionFailure(failedJobKey, newData, reason, jobExecutionMap) + } + + private def handleDeclarationEvaluationFailure(declarationKey: DeclarationKey, reason: Throwable, stateData: WorkflowExecutionActorData) = { + handleExecutionFailure(declarationKey, stateData, reason, Map.empty) + } + + private def handleExecutionFailure(failedJobKey: JobKey, data: WorkflowExecutionActorData, reason: Throwable, jobExecutionMap: JobExecutionMap) = { + val newData = data.executionFailed(failedJobKey) + + if (workflowDescriptor.getWorkflowOption(WorkflowFailureMode).contains(ContinueWhilePossible.toString)) { + newData.workflowCompletionStatus match { + case Some(completionStatus) if completionStatus == Failed => + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData + case _ => + stay() using startRunnableScopes(newData) + } + } else { + context.parent ! WorkflowExecutionFailedResponse(newData.jobExecutionMap, reason) + goto(WorkflowExecutionFailedState) using newData + } + } + + private def handleWorkflowSuccessful(data: WorkflowExecutionActorData) = { + import cromwell.util.JsonFormatting.WdlValueJsonFormatter._ + import spray.json._ + + val (response, finalState) = workflowDescriptor.workflow.evaluateOutputs( + workflowDescriptor.inputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) map { workflowOutputs => + workflowLogger.info( + s"""Workflow ${workflowDescriptor.workflow.unqualifiedName} complete. Final Outputs: + |${workflowOutputs.toJson.prettyPrint}""".stripMargin + ) + pushWorkflowOutputMetadata(workflowOutputs) + (WorkflowExecutionSucceededResponse(data.jobExecutionMap, workflowOutputs mapValues JobOutput.apply), WorkflowExecutionSuccessfulState) + } recover { + case ex => + (WorkflowExecutionFailedResponse(data.jobExecutionMap, ex), WorkflowExecutionFailedState) + } get + + context.parent ! response + goto(finalState) using data } private def handleRetryableFailure(jobKey: BackendJobDescriptorKey, reason: Throwable, returnCode: Option[Int]) = { @@ -427,103 +281,51 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, if (jobKey.attempt <= MaxRetries) { val newJobKey = jobKey.copy(attempt = jobKey.attempt + 1) workflowLogger.info(s"Retrying job execution for ${newJobKey.tag}") - /** Currently, we update the status of the old key to Preempted, and add a new entry (with the #attempts incremented by 1) + /* Currently, we update the status of the old key to Preempted, and add a new entry (with the #attempts incremented by 1) * to the execution store with status as NotStarted. This allows startRunnableCalls to re-execute this job */ val executionDiff = WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Preempted, newJobKey -> ExecutionStatus.NotStarted)) - val newData = stateData.mergeExecutionDiff(executionDiff) + val newData = stateData.mergeExecutionDiff(executionDiff).removeCallExecutionActor(jobKey) stay() using startRunnableScopes(newData) } else { workflowLogger.warn(s"Exhausted maximum number of retries for job ${jobKey.tag}. Failing.") - goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) - } - } - - private def handleJobSuccessful(jobKey: JobKey, outputs: JobOutputs, data: WorkflowExecutionActorData) = { - workflowLogger.debug(s"Job ${jobKey.tag} succeeded!") - val newData = data.jobExecutionSuccess(jobKey, outputs) - - newData.workflowCompletionStatus match { - case Some(ExecutionStatus.Done) => - workflowLogger.info(newData.outputsJson()) - goto(WorkflowExecutionSuccessfulState) using newData - case Some(sts) => - context.parent ! WorkflowExecutionFailedResponse(stateData.executionStore, stateData.outputStore, List(new Exception("One or more jobs failed in fail-slow mode"))) - goto(WorkflowExecutionFailedState) using newData - case _ => - stay() using startRunnableScopes(newData) + goto(WorkflowExecutionFailedState) using stateData.mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))).removeCallExecutionActor(jobKey) } } - private def pushWorkflowOutputMetadata(data: WorkflowExecutionActorData) = { - val reportableOutputs = workflowDescriptor.backendDescriptor.workflowNamespace.workflow.outputs - val keyValues = data.outputStore.store filterKeys { - _.index.isEmpty - } flatMap { - case (key, value) => - value collect { - case entry if isReportableOutput(key.call, entry, reportableOutputs) => - s"${key.call.fullyQualifiedName}.${entry.name}" -> entry.wdlValue - } - } collect { - case (key, Some(wdlValue)) => (key, wdlValue) - } - - val events = keyValues match { - case empty if empty.isEmpty => List(MetadataEvent.empty(MetadataKey(workflowId, None, WorkflowMetadataKeys.Outputs))) - case _ => keyValues flatMap { - case (outputName, outputValue) => - wdlValueToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Outputs}:$outputName"), outputValue) - } - } - - serviceRegistryActor ! PutMetadataAction(events) + private def handleCallSuccessful(jobKey: JobKey, outputs: CallOutputs, data: WorkflowExecutionActorData, jobExecutionMap: JobExecutionMap) = { + handleExecutionSuccess(data.callExecutionSuccess(jobKey, outputs).addExecutions(jobExecutionMap)) } - - private def isReportableOutput(scope: Scope, entry: OutputEntry, - reportableOutputs: Seq[ReportableSymbol]): Boolean = { - reportableOutputs exists { reportableOutput => - reportableOutput.fullyQualifiedName == s"${scope.fullyQualifiedName}.${entry.name}" - } + + private def handleDeclarationEvaluationSuccessful(key: DeclarationKey, value: WdlValue, data: WorkflowExecutionActorData) = { + handleExecutionSuccess(data.declarationEvaluationSuccess(key, value)) } - - private def pushSuccessfulJobMetadata(jobKey: JobKey, returnCode: Option[Int], outputs: JobOutputs) = { - val completionEvents = completedJobMetadataEvents(jobKey, ExecutionStatus.Done, returnCode) - - val outputEvents = outputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}"))) + + private def handleExecutionSuccess(data: WorkflowExecutionActorData) = { + data.workflowCompletionStatus match { + case Some(ExecutionStatus.Done) => + handleWorkflowSuccessful(data) + case Some(sts) => + context.parent ! WorkflowExecutionFailedResponse(data.jobExecutionMap, new Exception("One or more jobs failed in fail-slow mode")) + goto(WorkflowExecutionFailedState) using data case _ => - outputs flatMap { case (lqn, value) => wdlValueToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Outputs}:$lqn"), value.wdlValue) } + stay() using startRunnableScopes(data) } - - serviceRegistryActor ! PutMetadataAction(completionEvents ++ outputEvents) - } - - private def pushFailedJobMetadata(jobKey: JobKey, returnCode: Option[Int], failure: Throwable, retryableFailure: Boolean) = { - val failedState = if (retryableFailure) ExecutionStatus.Preempted else ExecutionStatus.Failed - val completionEvents = completedJobMetadataEvents(jobKey, failedState, returnCode) - val retryableFailureEvent = MetadataEvent(metadataKey(jobKey, CallMetadataKeys.RetryableFailure), MetadataValue(retryableFailure)) - val failureEvents = throwableToMetadataEvents(metadataKey(jobKey, s"${CallMetadataKeys.Failures}[$randomNumberString]"), failure).+:(retryableFailureEvent) - - serviceRegistryActor ! PutMetadataAction(completionEvents ++ failureEvents) } - - private def randomNumberString: String = Random.nextInt.toString.stripPrefix("-") - - private def completedJobMetadataEvents(jobKey: JobKey, executionStatus: ExecutionStatus, returnCode: Option[Int]) = { - val returnCodeEvent = returnCode map { rc => - List(MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ReturnCode), MetadataValue(rc))) + + private def handleCallAborted(data: WorkflowExecutionActorData, jobKey: JobKey, jobExecutionMap: JobExecutionMap) = { + workflowLogger.info(s"$tag job aborted: ${jobKey.tag}") + val newStateData = data.removeCallExecutionActor(jobKey).addExecutions(jobExecutionMap) + if (!newStateData.hasRunningActors) { + workflowLogger.info(s"$tag all jobs aborted") + goto(WorkflowExecutionAbortedState) + } else { + stay() using newStateData } - - List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.ExecutionStatus), MetadataValue(executionStatus)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.End), MetadataValue(OffsetDateTime.now)) - ) ++ returnCodeEvent.getOrElse(List.empty) } /** * Attempt to start all runnable jobs and return updated state data. This will create a new copy - * of the state data including new pending persists. + * of the state data. */ @tailrec private def startRunnableScopes(data: WorkflowExecutionActorData): WorkflowExecutionActorData = { @@ -537,6 +339,9 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, case k: BackendJobDescriptorKey => processRunnableJob(k, data) case k: ScatterKey => processRunnableScatter(k, data) case k: CollectorKey => processRunnableCollector(k, data) + case k: SubWorkflowKey => processRunnableSubWorkflow(k, data) + case k: StaticDeclarationKey => processRunnableStaticDeclaration(k) + case k: DynamicDeclarationKey => processRunnableDynamicDeclaration(k, data) case k => val exception = new UnsupportedOperationException(s"Unknown entry in execution store: ${k.tag}") self ! JobInitializationFailed(k, exception) @@ -544,35 +349,42 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, } TryUtil.sequence(executionDiffs) match { - case Success(diffs) if diffs.exists(_.containsNewEntry) => startRunnableScopes(data.mergeExecutionDiffs(diffs)) - case Success(diffs) => data.mergeExecutionDiffs(diffs) - case Failure(e) => data + case Success(diffs) => + // Update the metadata for the jobs we just sent to EJEAs (they'll start off queued up waiting for tokens): + pushQueuedCallMetadata(diffs) + if (diffs.exists(_.containsNewEntry)) { + startRunnableScopes(data.mergeExecutionDiffs(diffs)) + } else { + data.mergeExecutionDiffs(diffs) + } + case Failure(e) => throw new RuntimeException("Unexpected engine failure", e) } } - private def pushNewJobMetadata(jobKey: BackendJobDescriptorKey, backendName: String) = { - val startEvents = List( - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Start), MetadataValue(OffsetDateTime.now)), - MetadataEvent(metadataKey(jobKey, CallMetadataKeys.Backend), MetadataValue(backendName)) - ) - - serviceRegistryActor ! PutMetadataAction(startEvents) + def processRunnableStaticDeclaration(declaration: StaticDeclarationKey) = { + self ! DeclarationEvaluationSucceededResponse(declaration, declaration.value) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) } + + def processRunnableDynamicDeclaration(declaration: DynamicDeclarationKey, data: WorkflowExecutionActorData) = { + val scatterMap = declaration.index flatMap { i => + // Will need update for nested scatters + declaration.scope.ancestry collectFirst { case s: Scatter => Map(s -> i) } + } getOrElse Map.empty[Scatter, Int] - private def pushRunningJobMetadata(jobDescriptor: BackendJobDescriptor) = { - val inputEvents = jobDescriptor.inputs match { - case empty if empty.isEmpty => - List(MetadataEvent.empty(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}"))) - case inputs => - inputs flatMap { - case (inputName, inputValue) => - wdlValueToMetadataEvents(metadataKey(jobDescriptor.key, s"${CallMetadataKeys.Inputs}:$inputName"), inputValue) - } + val lookup = declaration.scope.lookupFunction( + workflowDescriptor.workflowInputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries, + scatterMap + ) + + declaration.requiredExpression.evaluate(lookup, data.expressionLanguageFunctions) match { + case Success(result) => self ! DeclarationEvaluationSucceededResponse(declaration, result) + case Failure(ex) => self ! DeclarationEvaluationFailedResponse(declaration, ex) } - val runningEvent = List(MetadataEvent(metadataKey(jobDescriptor.key, CallMetadataKeys.ExecutionStatus), MetadataValue(ExecutionStatus.Running))) - - serviceRegistryActor ! PutMetadataAction(runningEvent ++ inputEvents) + Success(WorkflowExecutionDiff(Map(declaration -> ExecutionStatus.Running))) } private def processRunnableJob(jobKey: BackendJobDescriptorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { @@ -583,7 +395,7 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, workflowLogger.error(exception, s"$tag $message") throw exception case Some(backendName) => - factories.get(backendName) match { + backendFactories.get(backendName) match { case Some(factory) => val ejeaName = s"${workflowDescriptor.id}-EngineJobExecutionActor-${jobKey.tag}" val backendSingleton = backendSingletonCollection.backendSingletonActors(backendName) @@ -591,32 +403,224 @@ final case class WorkflowExecutionActor(workflowId: WorkflowId, self, jobKey, data, factory, initializationData.get(backendName), restarting, serviceRegistryActor, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, backendSingleton, backendName, workflowDescriptor.callCachingMode) val ejeaRef = context.actorOf(ejeaProps, ejeaName) - pushNewJobMetadata(jobKey, backendName) + context watch ejeaRef + pushNewCallMetadata(jobKey, Option(backendName)) ejeaRef ! EngineJobExecutionActor.Execute - Success(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Starting))) + Success(WorkflowExecutionDiff( + executionStoreChanges = Map(jobKey -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(ejeaRef -> jobKey))) case None => throw WorkflowExecutionException(NonEmptyList.of(new Exception(s"Could not get BackendLifecycleActor for backend $backendName"))) } } } + + private def processRunnableSubWorkflow(key: SubWorkflowKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { + val sweaRef = context.actorOf( + SubWorkflowExecutionActor.props(key, data, backendFactories, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting), + s"SubWorkflowExecutionActor-${key.tag}" + ) + + context watch sweaRef + pushNewCallMetadata(key, None) + sweaRef ! SubWorkflowExecutionActor.Execute + + Success(WorkflowExecutionDiff(executionStoreChanges = Map(key -> ExecutionStatus.QueuedInCromwell), + engineJobExecutionActorAdditions = Map(sweaRef -> key))) + } private def processRunnableScatter(scatterKey: ScatterKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val lookup = data.hierarchicalLookup(scatterKey.scope, None) _ + val lookup = scatterKey.scope.lookupFunction( + workflowDescriptor.workflowInputs, + data.expressionLanguageFunctions, + data.outputStore.fetchNodeOutputEntries + ) scatterKey.scope.collection.evaluate(lookup, data.expressionLanguageFunctions) map { - case a: WdlArray => WorkflowExecutionDiff(scatterKey.populate(a.value.size) + (scatterKey -> ExecutionStatus.Done)) + case a: WdlArray => WorkflowExecutionDiff(scatterKey.populate(a.value.size, workflowDescriptor.inputs) + (scatterKey -> ExecutionStatus.Done)) case v: WdlValue => throw new RuntimeException("Scatter collection must evaluate to an array") } } private def processRunnableCollector(collector: CollectorKey, data: WorkflowExecutionActorData): Try[WorkflowExecutionDiff] = { - val shards = data.executionStore.findShardEntries(collector) collect { case (k: BackendJobDescriptorKey, v) if v == ExecutionStatus.Done => k } + val shards = data.executionStore.findShardEntries(collector) collect { + case (k: CallKey, v) if v == ExecutionStatus.Done => k + case (k: DynamicDeclarationKey, v) if v == ExecutionStatus.Done => k + } data.outputStore.generateCollectorOutput(collector, shards) match { case Failure(e) => Failure(new RuntimeException(s"Failed to collect output shards for call ${collector.tag}")) case Success(outputs) => self ! ScatterCollectionSucceededResponse(collector, outputs) Success(WorkflowExecutionDiff(Map(collector -> ExecutionStatus.Starting))) } } - - private def metadataKey(jobKey: JobKey, myKey: String) = MetadataKey(workflowDescriptor.id, Option(MetadataJobKey(jobKey.scope.fullyQualifiedName, jobKey.index, jobKey.attempt)), myKey) } + +object WorkflowExecutionActor { + + /** + * States + */ + sealed trait WorkflowExecutionActorState { + def terminal = false + } + + sealed trait WorkflowExecutionActorTerminalState extends WorkflowExecutionActorState { + override val terminal = true + } + + case object WorkflowExecutionPendingState extends WorkflowExecutionActorState + + case object WorkflowExecutionInProgressState extends WorkflowExecutionActorState + + case object WorkflowExecutionAbortingState extends WorkflowExecutionActorState + + case object WorkflowExecutionSuccessfulState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionFailedState extends WorkflowExecutionActorTerminalState + + case object WorkflowExecutionAbortedState extends WorkflowExecutionActorTerminalState + + /** + * Commands + */ + sealed trait WorkflowExecutionActorCommand + + case object ExecuteWorkflowCommand extends WorkflowExecutionActorCommand + + /** + * Responses + */ + sealed trait WorkflowExecutionActorResponse { + def jobExecutionMap: JobExecutionMap + } + + case class WorkflowExecutionSucceededResponse(jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionSucceededResponse" + } + + case class WorkflowExecutionAbortedResponse(jobExecutionMap: JobExecutionMap) + extends WorkflowExecutionActorResponse with EngineLifecycleActorAbortedResponse { + override def toString = "WorkflowExecutionAbortedResponse" + } + + final case class WorkflowExecutionFailedResponse(jobExecutionMap: JobExecutionMap, reason: Throwable) extends WorkflowExecutionActorResponse { + override def toString = "WorkflowExecutionFailedResponse" + } + + /** + * Internal control flow messages + */ + private case class JobInitializationFailed(jobKey: JobKey, throwable: Throwable) + + private case class ScatterCollectionFailedResponse(collectorKey: CollectorKey, throwable: Throwable) + + private case class ScatterCollectionSucceededResponse(collectorKey: CollectorKey, outputs: CallOutputs) + + private case class DeclarationEvaluationSucceededResponse(declarationKey: DeclarationKey, value: WdlValue) + + private case class DeclarationEvaluationFailedResponse(declarationKey: DeclarationKey, reason: Throwable) + + case class SubWorkflowSucceededResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, outputs: CallOutputs) + + case class SubWorkflowFailedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap, reason: Throwable) + + case class SubWorkflowAbortedResponse(key: SubWorkflowKey, jobExecutionMap: JobExecutionMap) + + /** + * Internal ADTs + */ + case class ScatterKey(scope: Scatter) extends JobKey { + override val index = None + // When scatters are nested, this might become Some(_) + override val attempt = 1 + override val tag = scope.unqualifiedName + + /** + * Creates a sub-ExecutionStore with Starting entries for each of the scoped children. + * + * @param count Number of ways to scatter the children. + * @return ExecutionStore of scattered children. + */ + def populate(count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Map[JobKey, ExecutionStatus.Value] = { + val keys = this.scope.children flatMap { + explode(_, count, workflowCoercedInputs) + } + keys map { + _ -> ExecutionStatus.NotStarted + } toMap + } + + private def explode(scope: Scope, count: Int, workflowCoercedInputs: WorkflowCoercedInputs): Seq[JobKey] = { + scope match { + case call: TaskCall => + val shards = (0 until count) map { i => BackendJobDescriptorKey(call, Option(i), 1) } + shards :+ CollectorKey(call) + case call: WorkflowCall => + val shards = (0 until count) map { i => SubWorkflowKey(call, Option(i), 1) } + shards :+ CollectorKey(call) + case declaration: Declaration => + val shards = (0 until count) map { i => DeclarationKey(declaration, Option(i), workflowCoercedInputs) } + shards :+ CollectorKey(declaration) + case scatter: Scatter => + throw new UnsupportedOperationException("Nested Scatters are not supported (yet) ... but you might try a sub workflow to achieve the same effect!") + case e => + throw new UnsupportedOperationException(s"Scope ${e.getClass.getName} is not supported.") + } + } + } + + // Represents a scatter collection for a call in the execution store + case class CollectorKey(scope: Scope with GraphNode) extends JobKey { + override val index = None + override val attempt = 1 + override val tag = s"Collector-${scope.unqualifiedName}" + } + + case class SubWorkflowKey(scope: WorkflowCall, index: ExecutionIndex, attempt: Int) extends CallKey { + override val tag = s"SubWorkflow-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + object DeclarationKey { + def apply(declaration: Declaration, index: ExecutionIndex, inputs: WorkflowCoercedInputs): DeclarationKey = { + inputs.find(_._1 == declaration.fullyQualifiedName) match { + case Some((_, value)) => StaticDeclarationKey(declaration, index, value) + case None => declaration.expression map { expression => + DynamicDeclarationKey(declaration, index, expression) + } getOrElse { + throw new RuntimeException(s"Found a declaration ${declaration.fullyQualifiedName} without expression and without input value. This should have been a validation error.") + } + } + } + } + + sealed trait DeclarationKey extends JobKey { + override val attempt = 1 + override val tag = s"Declaration-${scope.unqualifiedName}:${index.fromIndex}:$attempt" + } + + case class StaticDeclarationKey(scope: Declaration, index: ExecutionIndex, value: WdlValue) extends DeclarationKey + + case class DynamicDeclarationKey(scope: Declaration, index: ExecutionIndex, requiredExpression: WdlExpression) extends DeclarationKey + + case class WorkflowExecutionException[T <: Throwable](exceptions: NonEmptyList[T]) extends ThrowableAggregation { + override val throwables = exceptions.toList + override val exceptionContext = s"WorkflowExecutionActor" + } + + private lazy val DefaultMaxRetriesFallbackValue = 10 + + def props(workflowDescriptor: EngineWorkflowDescriptor, + serviceRegistryActor: ActorRef, + jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, + callCacheReadActor: ActorRef, + jobTokenDispenserActor: ActorRef, + backendSingletonCollection: BackendSingletonCollection, + initializationData: AllBackendInitializationData, + restarting: Boolean): Props = { + Props(WorkflowExecutionActor(workflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, + callCacheReadActor, jobTokenDispenserActor, backendSingletonCollection, initializationData, restarting)).withDispatcher(EngineDispatcher) + } +} \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala index 599c8f1b4..4bf2f213c 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorData.scala @@ -1,55 +1,100 @@ package cromwell.engine.workflow.lifecycle.execution import akka.actor.ActorRef +import cromwell.backend._ import cromwell.core.ExecutionStatus._ -import cromwell.core.OutputStore.{OutputCallKey, OutputEntry} import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.OutputStore.{OutputCallKey, OutputEntry} +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{DeclarationKey, SubWorkflowKey} import cromwell.engine.{EngineWorkflowDescriptor, WdlFunctions} import cromwell.util.JsonFormatting.WdlValueJsonFormatter -import wdl4s.Scope - +import wdl4s.values.WdlValue +import wdl4s.{GraphNode, Scope} object WorkflowExecutionDiff { def empty = WorkflowExecutionDiff(Map.empty) } /** Data differential between current execution data, and updates performed in a method that needs to be merged. */ -final case class WorkflowExecutionDiff(executionStore: Map[JobKey, ExecutionStatus]) { - def containsNewEntry = executionStore.exists(_._2 == NotStarted) +final case class WorkflowExecutionDiff(executionStoreChanges: Map[JobKey, ExecutionStatus], + engineJobExecutionActorAdditions: Map[ActorRef, JobKey] = Map.empty) { + def containsNewEntry = executionStoreChanges.exists(_._2 == NotStarted) +} + +object WorkflowExecutionActorData { + def empty(workflowDescriptor: EngineWorkflowDescriptor) = { + new WorkflowExecutionActorData( + workflowDescriptor, + ExecutionStore.empty, + Map.empty, + Map.empty, + Map.empty, + Map.empty, + OutputStore.empty + ) + } } case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, backendJobExecutionActors: Map[JobKey, ActorRef], - outputStore: OutputStore) extends WdlLookup { + engineCallExecutionActors: Map[ActorRef, JobKey], + subWorkflowExecutionActors: Map[SubWorkflowKey, ActorRef], + downstreamExecutionMap: JobExecutionMap, + outputStore: OutputStore) { + + val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.pathBuilders) + + def callExecutionSuccess(jobKey: JobKey, outputs: CallOutputs) = { + val (newJobExecutionActors, newSubWorkflowExecutionActors) = jobKey match { + case jobKey: BackendJobDescriptorKey => (backendJobExecutionActors - jobKey, subWorkflowExecutionActors) + case swKey: SubWorkflowKey => (backendJobExecutionActors, subWorkflowExecutionActors - swKey) + case _ => (backendJobExecutionActors, subWorkflowExecutionActors) + } - override val expressionLanguageFunctions = new WdlFunctions(workflowDescriptor.engineFilesystems) + this.copy( + executionStore = executionStore.add(Map(jobKey -> Done)), + backendJobExecutionActors = newJobExecutionActors, + subWorkflowExecutionActors = newSubWorkflowExecutionActors, + outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) + ) + } + + def declarationEvaluationSuccess(declarationKey: DeclarationKey, value: WdlValue) = { + val outputStoreKey = OutputCallKey(declarationKey.scope, declarationKey.index) + val outputStoreValue = OutputEntry(declarationKey.scope.unqualifiedName, value.wdlType, Option(value)) + this.copy( + executionStore = executionStore.add(Map(declarationKey -> Done)), + outputStore = outputStore.add(Map(outputStoreKey -> List(outputStoreValue))) + ) + } - def jobExecutionSuccess(jobKey: JobKey, outputs: JobOutputs) = this.copy( - executionStore = executionStore.add(Map(jobKey -> Done)), - backendJobExecutionActors = backendJobExecutionActors - jobKey, - outputStore = outputStore.add(updateSymbolStoreEntry(jobKey, outputs)) - ) + def executionFailed(jobKey: JobKey) = mergeExecutionDiff(WorkflowExecutionDiff(Map(jobKey -> ExecutionStatus.Failed))) /** Add the outputs for the specified `JobKey` to the symbol cache. */ - private def updateSymbolStoreEntry(jobKey: JobKey, outputs: JobOutputs) = { + private def updateSymbolStoreEntry(jobKey: JobKey, outputs: CallOutputs) = { val newOutputEntries = outputs map { case (name, value) => OutputEntry(name, value.wdlValue.wdlType, Option(value.wdlValue)) } - Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries) + Map(OutputCallKey(jobKey.scope, jobKey.index) -> newOutputEntries.toList) } /** Checks if the workflow is completed by scanning through the executionStore. * If complete, this will return Some(finalStatus). Otherwise, returns None */ def workflowCompletionStatus: Option[ExecutionStatus] = { // `List`ify the `prerequisiteScopes` to avoid expensive hashing of `Scope`s when assembling the result. - def upstream(scope: Scope): List[Scope] = scope.prerequisiteScopes.toList ++ scope.prerequisiteScopes.toList.flatMap(upstream) - def upstreamFailed(scope: Scope) = upstream(scope) filter { s => - executionStore.store.map({ case (a, b) => a.scope -> b }).get(s).contains(Failed) + def upstream(scope: GraphNode): List[Scope] = { + val directUpstream: List[Scope with GraphNode] = scope.upstream.toList + directUpstream ++ directUpstream.flatMap(upstream) + } + def upstreamFailed(scope: Scope) = scope match { + case node: GraphNode => upstream(node) filter { s => + executionStore.store.exists({ case (key, status) => status == Failed && key.scope == s }) + } } // activeJobs is the subset of the executionStore that are either running or will run in the future. val activeJobs = executionStore.store.toList filter { - case (jobKey, jobStatus) => (jobStatus == NotStarted && upstreamFailed(jobKey.scope).isEmpty) || jobStatus == Starting || jobStatus == Running + case (jobKey, jobStatus) => (jobStatus == NotStarted && upstreamFailed(jobKey.scope).isEmpty) || jobStatus == QueuedInCromwell || jobStatus == Starting || jobStatus == Running } activeJobs match { @@ -63,13 +108,30 @@ case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescript executionStore.store.values.exists(_ == ExecutionStatus.Failed) } - def addBackendJobExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { - case Some(actorRef) => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + def removeEngineJobExecutionActor(actorRef: ActorRef) = { + this.copy(engineCallExecutionActors = engineCallExecutionActors - actorRef) + } + + def addCallExecutionActor(jobKey: JobKey, actor: Option[ActorRef]): WorkflowExecutionActorData = actor match { + case Some(actorRef) => + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors + (jobKey -> actorRef)) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors + (swKey -> actorRef)) + case _ => this + } case None => this } - def removeBackendJobExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { - this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) + def removeCallExecutionActor(jobKey: JobKey): WorkflowExecutionActorData = { + jobKey match { + case jobKey: BackendJobDescriptorKey => this.copy(backendJobExecutionActors = backendJobExecutionActors - jobKey) + case swKey: SubWorkflowKey => this.copy(subWorkflowExecutionActors = subWorkflowExecutionActors - swKey) + case _ => this + } + } + + def addExecutions(jobExecutionMap: JobExecutionMap): WorkflowExecutionActorData = { + this.copy(downstreamExecutionMap = downstreamExecutionMap ++ jobExecutionMap) } def outputsJson(): String = { @@ -86,11 +148,19 @@ case class WorkflowExecutionActorData(workflowDescriptor: EngineWorkflowDescript } def mergeExecutionDiff(diff: WorkflowExecutionDiff): WorkflowExecutionActorData = { - this.copy(executionStore = executionStore.add(diff.executionStore)) + this.copy( + executionStore = executionStore.add(diff.executionStoreChanges), + engineCallExecutionActors = engineCallExecutionActors ++ diff.engineJobExecutionActorAdditions) } def mergeExecutionDiffs(diffs: Traversable[WorkflowExecutionDiff]): WorkflowExecutionActorData = { diffs.foldLeft(this)((newData, diff) => newData.mergeExecutionDiff(diff)) } - + + def jobExecutionMap: JobExecutionMap = { + val keys = executionStore.store.collect({case (k: BackendJobDescriptorKey, status) if status != ExecutionStatus.NotStarted => k }).toList + downstreamExecutionMap updated (workflowDescriptor.backendDescriptor, keys) + } + + def hasRunningActors = backendJobExecutionActors.nonEmpty || subWorkflowExecutionActors.nonEmpty } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala new file mode 100644 index 000000000..d569f1fae --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowMetadataHelper.scala @@ -0,0 +1,37 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.time.OffsetDateTime + +import akka.actor.ActorRef +import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowState} +import cromwell.services.metadata.MetadataService._ +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} + +import scala.util.Random + +trait WorkflowMetadataHelper { + + def serviceRegistryActor: ActorRef + + def pushWorkflowStart(workflowId: WorkflowId) = { + val startEvent = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.StartTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(startEvent) + } + + def pushWorkflowEnd(workflowId: WorkflowId) = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.EndTime), MetadataValue(OffsetDateTime.now.toString)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + + def pushWorkflowFailures(workflowId: WorkflowId, failures: List[Throwable]) = { + val failureEvents = failures flatMap { r => throwableToMetadataEvents(MetadataKey(workflowId, None, s"${WorkflowMetadataKeys.Failures}[${Random.nextInt(Int.MaxValue)}]"), r) } + serviceRegistryActor ! PutMetadataAction(failureEvents) + } + + def pushCurrentStateToMetadataService(workflowId: WorkflowId, workflowState: WorkflowState): Unit = { + val metadataEventMsg = MetadataEvent(MetadataKey(workflowId, None, WorkflowMetadataKeys.Status), + MetadataValue(workflowState)) + serviceRegistryActor ! PutMetadataAction(metadataEventMsg) + } + +} diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala index 8c5331c42..674b1ee88 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCache.scala @@ -1,7 +1,9 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching +import java.nio.file.Path + import cats.data.NonEmptyList -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.core.ExecutionIndex.IndexEnhancedIndex import cromwell.core.WorkflowId import cromwell.core.callcaching.HashResult @@ -19,7 +21,7 @@ final case class CallCachingEntryId(id: Int) * Given a database-layer CallCacheStore, this accessor can access the database with engine-friendly data types. */ class CallCache(database: CallCachingSqlDatabase) { - def addToCache(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, response: SucceededResponse)(implicit ec: ExecutionContext): Future[Unit] = { + def addToCache(workflowId: WorkflowId, callCacheHashes: CallCacheHashes, response: JobSucceededResponse)(implicit ec: ExecutionContext): Future[Unit] = { val metaInfo = CallCachingEntry( workflowExecutionUuid = workflowId.toString, callFullyQualifiedName = response.jobKey.call.fullyQualifiedName, @@ -35,7 +37,7 @@ class CallCache(database: CallCachingSqlDatabase) { } private def addToCache(callCachingEntry: CallCachingEntry, hashes: Set[HashResult], - result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, String]) + result: Iterable[WdlValueSimpleton], jobDetritus: Map[String, Path]) (implicit ec: ExecutionContext): Future[Unit] = { val hashesToInsert: Iterable[CallCachingHashEntry] = { @@ -51,7 +53,7 @@ class CallCache(database: CallCachingSqlDatabase) { val jobDetritusToInsert: Iterable[CallCachingDetritusEntry] = { jobDetritus map { - case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath) + case (fileName, filePath) => CallCachingDetritusEntry(fileName, filePath.toUri.toString) } } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala index ef09d32e9..a0ec75fc8 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheInvalidateActor.scala @@ -33,4 +33,5 @@ object CallCacheInvalidateActor { sealed trait CallCacheInvalidatedResponse case object CallCacheInvalidatedSuccess extends CallCacheInvalidatedResponse +case object CallCacheInvalidationUnnecessary extends CallCacheInvalidatedResponse case class CallCacheInvalidatedFailure(t: Throwable) extends CallCacheInvalidatedResponse \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala index f0e9c0186..c6e42b5cc 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/CallCacheWriteActor.scala @@ -2,14 +2,14 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{Actor, ActorLogging, Props} import cromwell.backend.BackendJobExecutionActor -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.core.WorkflowId import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes import scala.concurrent.ExecutionContext import scala.util.{Failure, Success} -case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: BackendJobExecutionActor.SucceededResponse) extends Actor with ActorLogging { +case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: BackendJobExecutionActor.JobSucceededResponse) extends Actor with ActorLogging { implicit val ec: ExecutionContext = context.dispatcher @@ -30,7 +30,7 @@ case class CallCacheWriteActor(callCache: CallCache, workflowId: WorkflowId, cal } object CallCacheWriteActor { - def props(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: SucceededResponse): Props = + def props(callCache: CallCache, workflowId: WorkflowId, callCacheHashes: CallCacheHashes, succeededResponse: JobSucceededResponse): Props = Props(CallCacheWriteActor(callCache, workflowId, callCacheHashes, succeededResponse)) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala index b4ad358f5..053e8a14e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActor.scala @@ -65,7 +65,7 @@ case class EngineJobHashingActor(receiver: ActorRef, import cromwell.core.simpleton.WdlValueSimpleton._ - val inputSimpletons = jobDescriptor.inputs.simplify + val inputSimpletons = jobDescriptor.fullyQualifiedInputs.simplify val (fileInputSimpletons, nonFileInputSimpletons) = inputSimpletons partition { case WdlValueSimpleton(_, f: WdlFile) => true case _ => false @@ -107,7 +107,7 @@ case class EngineJobHashingActor(receiver: ActorRef, } val outputExpressionHashResults = jobDescriptor.call.task.outputs map { output => - HashResult(HashKey(s"output expression: ${output.wdlType.toWdlString} ${output.name}"), output.requiredExpression.valueString.md5HashValue) + HashResult(HashKey(s"output expression: ${output.wdlType.toWdlString} ${output.unqualifiedName}"), output.requiredExpression.valueString.md5HashValue) } // Build these all together for the final set of initial hashes: diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala index 1d3eedd9f..d0350e662 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/execution/package.scala @@ -1,10 +1,12 @@ package cromwell.engine.workflow.lifecycle +import akka.actor.ActorRef import wdl4s._ -package object execution { - def splitFqn(fullyQualifiedName: FullyQualifiedName): (String, String) = { - val lastIndex = fullyQualifiedName.lastIndexOf(".") - (fullyQualifiedName.substring(0, lastIndex), fullyQualifiedName.substring(lastIndex + 1)) - } +package execution { + + import cromwell.core.CallKey + + final case class JobRunning(key: CallKey, inputs: EvaluatedTaskInputs, executionActor: Option[ActorRef]) + final case class JobStarting(callKey: CallKey) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala index b2afd6b5e..99d10221d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/tokens/JobExecutionTokenDispenserActor.scala @@ -84,7 +84,7 @@ class JobExecutionTokenDispenserActor extends Actor with ActorLogging { private def onTerminate(terminee: ActorRef): Unit = { tokenAssignments.get(terminee) match { case Some(token) => - log.error("Actor {} stopped without returning its Job Execution Token. Reclaiming it!", terminee) + log.debug("Actor {} stopped without returning its Job Execution Token. Reclaiming it!", terminee) self.tell(msg = JobExecutionTokenReturn(token), sender = terminee) case None => log.debug("Actor {} stopped while we were still watching it... but it doesn't have a token. Removing it from any queues if necessary", terminee) diff --git a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala similarity index 88% rename from engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala rename to engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala index a24be2d32..0f04212f8 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/InMemoryWorkflowStore.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.workflowstore import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} @@ -14,7 +14,7 @@ class InMemoryWorkflowStore extends WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val submittedWorkflows = sources map { SubmittedWorkflow(WorkflowId.randomId(), _, WorkflowStoreState.Submitted) } workflowStore = workflowStore ++ submittedWorkflows.toList Future.successful(submittedWorkflows map { _.id }) @@ -44,7 +44,7 @@ class InMemoryWorkflowStore extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = Future.successful(()) } -final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFiles, state: WorkflowStoreState) { +final case class SubmittedWorkflow(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: WorkflowStoreState) { def toWorkflowToStart: WorkflowToStart = { state match { case r: StartableState => WorkflowToStart(id, sources, r) diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala index 7056137c3..29190617e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/SqlWorkflowStore.scala @@ -1,9 +1,12 @@ package cromwell.engine.workflow.workflowstore import java.time.OffsetDateTime +import javax.sql.rowset.serial.SerialBlob import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import com.typesafe.config.ConfigFactory +import net.ceedubs.ficus.Ficus._ +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.database.sql.SqlConverters._ import cromwell.database.sql.WorkflowStoreSqlDatabase import cromwell.database.sql.tables.WorkflowStoreEntry @@ -13,9 +16,13 @@ import scala.concurrent.{ExecutionContext, Future} case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends WorkflowStore { override def initialize(implicit ec: ExecutionContext): Future[Unit] = { - sqlDatabase.updateWorkflowState( - WorkflowStoreState.Running.toString, - WorkflowStoreState.Restartable.toString) + if (ConfigFactory.load().as[Option[Boolean]]("system.workflow-restart").getOrElse(true)) { + sqlDatabase.updateWorkflowState( + WorkflowStoreState.Running.toString, + WorkflowStoreState.Restartable.toString) + } else { + Future.successful(()) + } } override def remove(id: WorkflowId)(implicit ec: ExecutionContext): Future[Boolean] = { @@ -36,7 +43,7 @@ case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends Workf * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - override def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { + override def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] = { val asStoreEntries = sources map toWorkflowStoreEntry val returnValue = asStoreEntries map { workflowStore => WorkflowId.fromString(workflowStore.workflowExecutionUuid) } @@ -46,24 +53,27 @@ case class SqlWorkflowStore(sqlDatabase: WorkflowStoreSqlDatabase) extends Workf } private def fromWorkflowStoreEntry(workflowStoreEntry: WorkflowStoreEntry): WorkflowToStart = { - val sources = WorkflowSourceFiles( + val sources = WorkflowSourceFilesCollection( workflowStoreEntry.workflowDefinition.toRawString, workflowStoreEntry.workflowInputs.toRawString, - workflowStoreEntry.workflowOptions.toRawString) + workflowStoreEntry.workflowOptions.toRawString, + workflowStoreEntry.importsZipFile.map(b => b.getBytes(1, b.length.asInstanceOf[Int])) + ) WorkflowToStart( WorkflowId.fromString(workflowStoreEntry.workflowExecutionUuid), sources, fromDbStateStringToStartableState(workflowStoreEntry.workflowState)) } - private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFiles): WorkflowStoreEntry = { + private def toWorkflowStoreEntry(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowStoreEntry = { WorkflowStoreEntry( WorkflowId.randomId().toString, workflowSourceFiles.wdlSource.toClob, workflowSourceFiles.inputsJson.toClob, workflowSourceFiles.workflowOptionsJson.toClob, WorkflowStoreState.Submitted.toString, - OffsetDateTime.now.toSystemTimestamp + OffsetDateTime.now.toSystemTimestamp, + workflowSourceFiles.importsZipFileOption.map(new SerialBlob(_)) ) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala index e3d7b44be..f4734f7bb 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStore.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.workflowstore import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import scala.concurrent.{ExecutionContext, Future} @@ -14,7 +14,7 @@ trait WorkflowStore { * Adds the requested WorkflowSourceFiles to the store and returns a WorkflowId for each one (in order) * for tracking purposes. */ - def add(sources: NonEmptyList[WorkflowSourceFiles])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] + def add(sources: NonEmptyList[WorkflowSourceFilesCollection])(implicit ec: ExecutionContext): Future[NonEmptyList[WorkflowId]] /** * Retrieves up to n workflows which have not already been pulled into the engine and sets their pickedUp diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala index 24cb3a6a7..2ecccbce2 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/WorkflowStoreActor.scala @@ -4,7 +4,7 @@ import java.time.OffsetDateTime import akka.actor.{ActorLogging, ActorRef, LoggingFSM, Props} import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowMetadataKeys, WorkflowSourceFiles} +import cromwell.core._ import cromwell.engine.workflow.WorkflowManagerActor import cromwell.engine.workflow.WorkflowManagerActor.WorkflowNotFoundException import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ @@ -12,9 +12,10 @@ import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState import cromwell.services.metadata.MetadataService.{MetadataPutAcknowledgement, PutMetadataAction} import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import org.apache.commons.lang3.exception.ExceptionUtils +import wdl4s.util.TryUtil import scala.concurrent.{ExecutionContext, Future} -import scala.util.{Failure, Success} +import scala.util.{Failure, Success, Try} case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorRef) extends LoggingFSM[WorkflowStoreActorState, WorkflowStoreActorData] with ActorLogging { @@ -69,14 +70,14 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR private def startNewWork(command: WorkflowStoreActorCommand, sndr: ActorRef, nextData: WorkflowStoreActorData) = { val work: Future[Any] = command match { case cmd @ SubmitWorkflow(sourceFiles) => - store.add(NonEmptyList.of(sourceFiles)) map { ids => + storeWorkflowSources(NonEmptyList.of(sourceFiles)) map { ids => val id = ids.head registerSubmissionWithMetadataService(id, sourceFiles) sndr ! WorkflowSubmittedToStore(id) log.info("Workflow {} submitted.", id) } case cmd @ BatchSubmitWorkflows(sources) => - store.add(sources) map { ids => + storeWorkflowSources(sources) map { ids => val assignedSources = ids.toList.zip(sources.toList) assignedSources foreach { case (id, sourceFiles) => registerSubmissionWithMetadataService(id, sourceFiles) } sndr ! WorkflowsBatchSubmittedToStore(ids) @@ -116,6 +117,37 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR goto(Working) using nextData } + private def storeWorkflowSources(sources: NonEmptyList[WorkflowSourceFilesCollection]): Future[NonEmptyList[WorkflowId]] = { + for { + processedSources <- Future.fromTry(processSources(sources, _.asPrettyJson)) + workflowIds <- store.add(processedSources) + } yield workflowIds + } + + private def processSources(sources: NonEmptyList[WorkflowSourceFilesCollection], + processOptions: WorkflowOptions => WorkflowOptionsJson): + Try[NonEmptyList[WorkflowSourceFilesCollection]] = { + val nelTries: NonEmptyList[Try[WorkflowSourceFilesCollection]] = sources map processSource(processOptions) + val seqTries: Seq[Try[WorkflowSourceFilesCollection]] = nelTries.toList + val trySeqs: Try[Seq[WorkflowSourceFilesCollection]] = TryUtil.sequence(seqTries) + val tryNel: Try[NonEmptyList[WorkflowSourceFilesCollection]] = trySeqs.map(seq => NonEmptyList.fromList(seq.toList).get) + tryNel + } + + /** + * Runs processing on workflow source files before they are stored. + * + * @param processOptions How to process the workflow options + * @param source Original workflow source + * @return Attempted updated workflow source + */ + private def processSource(processOptions: WorkflowOptions => WorkflowOptionsJson) + (source: WorkflowSourceFilesCollection): Try[WorkflowSourceFilesCollection] = { + for { + processedWorkflowOptions <- WorkflowOptions.fromJsonString(source.workflowOptionsJson) + } yield source.copyOptions(processOptions(processedWorkflowOptions)) + } + private def addWorkCompletionHooks[A](command: WorkflowStoreActorCommand, work: Future[A]) = { work.onComplete { case Success(_) => @@ -152,7 +184,9 @@ case class WorkflowStoreActor(store: WorkflowStore, serviceRegistryActor: ActorR /** * Takes the workflow id and sends it over to the metadata service w/ default empty values for inputs/outputs */ - private def registerSubmissionWithMetadataService(id: WorkflowId, sourceFiles: WorkflowSourceFiles): Unit = { + private def registerSubmissionWithMetadataService(id: WorkflowId, originalSourceFiles: WorkflowSourceFilesCollection): Unit = { + val sourceFiles = processSource(_.clearEncryptedValues)(originalSourceFiles).get + val submissionEvents = List( MetadataEvent(MetadataKey(id, None, WorkflowMetadataKeys.SubmissionTime), MetadataValue(OffsetDateTime.now.toString)), MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Inputs)), @@ -186,8 +220,8 @@ object WorkflowStoreActor { private[workflowstore] case object Idle extends WorkflowStoreActorState sealed trait WorkflowStoreActorCommand - final case class SubmitWorkflow(source: WorkflowSourceFiles) extends WorkflowStoreActorCommand - final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFiles]) extends WorkflowStoreActorCommand + final case class SubmitWorkflow(source: WorkflowSourceFilesCollection) extends WorkflowStoreActorCommand + final case class BatchSubmitWorkflows(sources: NonEmptyList[WorkflowSourceFilesCollection]) extends WorkflowStoreActorCommand final case class FetchRunnableWorkflows(n: Int) extends WorkflowStoreActorCommand final case class RemoveWorkflow(id: WorkflowId) extends WorkflowStoreActorCommand final case class AbortWorkflow(id: WorkflowId, manager: ActorRef) extends WorkflowStoreActorCommand diff --git a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala index 0d9481c47..61bc37ee0 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/workflowstore/workflowstore_.scala @@ -1,6 +1,6 @@ package cromwell.engine.workflow.workflowstore -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreState.StartableState sealed trait WorkflowStoreState {def isStartable: Boolean} @@ -12,4 +12,4 @@ object WorkflowStoreState { case object Restartable extends StartableState } -final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFiles, state: StartableState) +final case class WorkflowToStart(id: WorkflowId, sources: WorkflowSourceFilesCollection, state: StartableState) diff --git a/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala new file mode 100644 index 000000000..a2eb585a3 --- /dev/null +++ b/engine/src/main/scala/cromwell/jobstore/EmptyJobStoreActor.scala @@ -0,0 +1,15 @@ +package cromwell.jobstore + +import akka.actor.{Actor, Props} +import cromwell.jobstore.JobStoreActor._ + +class EmptyJobStoreActor extends Actor { + override def receive: Receive = { + case w: JobStoreWriterCommand => sender ! JobStoreWriteSuccess(w) + case _: QueryJobCompletion => sender ! JobNotComplete + } +} + +object EmptyJobStoreActor { + def props: Props = Props(new EmptyJobStoreActor()) +} diff --git a/engine/src/main/scala/cromwell/jobstore/jobstore_.scala b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala index 7fbdd0107..921183d35 100644 --- a/engine/src/main/scala/cromwell/jobstore/jobstore_.scala +++ b/engine/src/main/scala/cromwell/jobstore/jobstore_.scala @@ -5,6 +5,6 @@ import cromwell.core.{WorkflowId, _} case class JobStoreKey(workflowId: WorkflowId, callFqn: String, index: Option[Int], attempt: Int) sealed trait JobResult -case class JobResultSuccess(returnCode: Option[Int], jobOutputs: JobOutputs) extends JobResult +case class JobResultSuccess(returnCode: Option[Int], jobOutputs: CallOutputs) extends JobResult case class JobResultFailure(returnCode: Option[Int], reason: Throwable, retryable: Boolean) extends JobResult diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index cc0f5f4aa..37dea8133 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -13,6 +13,7 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStore, WorkflowStoreActor} import cromwell.jobstore.{JobStore, JobStoreActor, SqlJobStore} import cromwell.services.{ServiceRegistryActor, SingletonServicesStore} +import cromwell.subworkflowstore.{SqlSubWorkflowStore, SubWorkflowStoreActor} import net.ceedubs.ficus.Ficus._ /** * An actor which serves as the lord protector for the rest of Cromwell, allowing us to have more fine grain @@ -29,6 +30,7 @@ import net.ceedubs.ficus.Ficus._ private val logger = Logging(context.system, this) private val config = ConfigFactory.load() + val serverMode: Boolean lazy val serviceRegistryActor: ActorRef = context.actorOf(ServiceRegistryActor.props(config), "ServiceRegistryActor") lazy val numberOfWorkflowLogCopyWorkers = config.getConfig("system").as[Option[Int]]("number-of-workflow-log-copy-workers").getOrElse(DefaultNumberOfWorkflowLogCopyWorkers) @@ -44,6 +46,9 @@ import net.ceedubs.ficus.Ficus._ lazy val jobStore: JobStore = new SqlJobStore(SingletonServicesStore.databaseInterface) lazy val jobStoreActor = context.actorOf(JobStoreActor.props(jobStore), "JobStoreActor") + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + lazy val subWorkflowStoreActor = context.actorOf(SubWorkflowStoreActor.props(subWorkflowStore), "SubWorkflowStoreActor") + lazy val callCache: CallCache = new CallCache(SingletonServicesStore.databaseInterface) lazy val callCacheReadActor = context.actorOf(RoundRobinPool(25) .props(CallCacheReadActor.props(callCache)), @@ -56,9 +61,12 @@ import net.ceedubs.ficus.Ficus._ lazy val jobExecutionTokenDispenserActor = context.actorOf(JobExecutionTokenDispenserActor.props) + def abortJobsOnTerminate: Boolean + lazy val workflowManagerActor = context.actorOf( WorkflowManagerActor.props( - workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobExecutionTokenDispenserActor, backendSingletonCollection), + workflowStoreActor, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, + jobExecutionTokenDispenserActor, backendSingletonCollection, abortJobsOnTerminate, serverMode), "WorkflowManagerActor") override def receive = { diff --git a/engine/src/main/scala/cromwell/server/CromwellServer.scala b/engine/src/main/scala/cromwell/server/CromwellServer.scala index dca31a625..36010fb62 100644 --- a/engine/src/main/scala/cromwell/server/CromwellServer.scala +++ b/engine/src/main/scala/cromwell/server/CromwellServer.scala @@ -53,6 +53,9 @@ object CromwellServer { class CromwellServerActor(config: Config) extends CromwellRootActor with CromwellApiService with SwaggerService { implicit def executionContext = actorRefFactory.dispatcher + override val serverMode = true + override val abortJobsOnTerminate = false + override def actorRefFactory = context override def receive = handleTimeouts orElse runRoute(possibleRoutes) diff --git a/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala new file mode 100644 index 000000000..166d7d685 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/EmptySubWorkflowStoreActor.scala @@ -0,0 +1,17 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, Props} +import cromwell.subworkflowstore.SubWorkflowStoreActor._ + +class EmptySubWorkflowStoreActor extends Actor with ActorLogging { + override def receive: Receive = { + case register: RegisterSubWorkflow => sender() ! SubWorkflowStoreRegisterSuccess(register) + case query: QuerySubWorkflow => sender() ! SubWorkflowNotFound(query) + case complete: WorkflowComplete =>sender() ! SubWorkflowStoreCompleteSuccess(complete) + case unknown => log.error(s"SubWorkflowStoreActor received unknown message: $unknown") + } +} + +object EmptySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor()) +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala new file mode 100644 index 000000000..64f21275f --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SqlSubWorkflowStore.scala @@ -0,0 +1,31 @@ +package cromwell.subworkflowstore +import cromwell.database.sql.SubWorkflowStoreSqlDatabase +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +class SqlSubWorkflowStore(subWorkflowStoreSqlDatabase: SubWorkflowStoreSqlDatabase) extends SubWorkflowStore { + override def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] = { + subWorkflowStoreSqlDatabase.addSubWorkflowStoreEntry( + rootWorkflowExecutionUuid, + parentWorkflowExecutionUuid, + callFullyQualifiedName, + jobIndex, + jobAttempt, + subWorkflowExecutionUuid + ) + } + + override def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int)(implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] = { + subWorkflowStoreSqlDatabase.querySubWorkflowStore(parentWorkflowExecutionUuid, callFqn, jobIndex, jobAttempt) + } + + override def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] = { + subWorkflowStoreSqlDatabase.removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid) + } +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala new file mode 100644 index 000000000..8ad92fa9b --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStore.scala @@ -0,0 +1,19 @@ +package cromwell.subworkflowstore + +import cromwell.database.sql.tables.SubWorkflowStoreEntry + +import scala.concurrent.{ExecutionContext, Future} + +trait SubWorkflowStore { + def addSubWorkflowStoreEntry(rootWorkflowExecutionUuid: String, + parentWorkflowExecutionUuid: String, + callFullyQualifiedName: String, + jobIndex: Int, + jobAttempt: Int, + subWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Unit] + + def querySubWorkflowStore(parentWorkflowExecutionUuid: String, callFqn: String, jobIndex: Int, jobAttempt: Int) + (implicit ec: ExecutionContext): Future[Option[SubWorkflowStoreEntry]] + + def removeSubWorkflowStoreEntries(parentWorkflowExecutionUuid: String)(implicit ec: ExecutionContext): Future[Int] +} diff --git a/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala new file mode 100644 index 000000000..cf7624087 --- /dev/null +++ b/engine/src/main/scala/cromwell/subworkflowstore/SubWorkflowStoreActor.scala @@ -0,0 +1,72 @@ +package cromwell.subworkflowstore + +import akka.actor.{Actor, ActorLogging, ActorRef, Props} +import cromwell.core.ExecutionIndex._ +import cromwell.core.{JobKey, WorkflowId} +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.subworkflowstore.SubWorkflowStoreActor._ + +import scala.concurrent.ExecutionContext +import scala.util.{Failure, Success} + +class SubWorkflowStoreActor(database: SubWorkflowStore) extends Actor with ActorLogging { + + implicit val ec: ExecutionContext = context.dispatcher + + override def receive = { + case register: RegisterSubWorkflow => registerSubWorkflow(sender(), register) + case query: QuerySubWorkflow => querySubWorkflow(sender(), query) + case complete: WorkflowComplete => workflowComplete(sender(), complete) + case unknown => log.error(s"SubWorkflowStoreActor received unknown message: $unknown") + } + + private def registerSubWorkflow(replyTo: ActorRef, command: RegisterSubWorkflow) = { + database.addSubWorkflowStoreEntry( + command.rootWorkflowExecutionUuid.toString, + command.parentWorkflowExecutionUuid.toString, + command.jobKey.scope.fullyQualifiedName, + command.jobKey.index.fromIndex, + command.jobKey.attempt, + command.subWorkflowExecutionUuid.toString + ) onComplete { + case Success(_) => replyTo ! SubWorkflowStoreRegisterSuccess(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + + private def querySubWorkflow(replyTo: ActorRef, command: QuerySubWorkflow) = { + val jobKey = command.jobKey + database.querySubWorkflowStore(command.parentWorkflowExecutionUuid.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt) onComplete { + case Success(Some(result)) => replyTo ! SubWorkflowFound(result) + case Success(None) => replyTo ! SubWorkflowNotFound(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + + private def workflowComplete(replyTo: ActorRef, command: WorkflowComplete) = { + database.removeSubWorkflowStoreEntries(command.workflowExecutionUuid.toString) onComplete { + case Success(_) => replyTo ! SubWorkflowStoreCompleteSuccess(command) + case Failure(ex) => replyTo ! SubWorkflowStoreFailure(command, ex) + } + } + +} + +object SubWorkflowStoreActor { + sealed trait SubWorkflowStoreActorCommand + case class RegisterSubWorkflow(rootWorkflowExecutionUuid: WorkflowId, parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey, subWorkflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + case class QuerySubWorkflow(parentWorkflowExecutionUuid: WorkflowId, jobKey: JobKey) extends SubWorkflowStoreActorCommand + case class WorkflowComplete(workflowExecutionUuid: WorkflowId) extends SubWorkflowStoreActorCommand + + sealed trait SubWorkflowStoreActorResponse + case class SubWorkflowStoreRegisterSuccess(command: RegisterSubWorkflow) extends SubWorkflowStoreActorResponse + case class SubWorkflowFound(subWorkflowStoreEntry: SubWorkflowStoreEntry) extends SubWorkflowStoreActorResponse + case class SubWorkflowNotFound(command: QuerySubWorkflow) extends SubWorkflowStoreActorResponse + case class SubWorkflowStoreCompleteSuccess(command: SubWorkflowStoreActorCommand) extends SubWorkflowStoreActorResponse + + case class SubWorkflowStoreFailure(command: SubWorkflowStoreActorCommand, failure: Throwable) extends SubWorkflowStoreActorResponse + + def props(database: SubWorkflowStore) = Props( + new SubWorkflowStoreActor(database) + ) +} diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala index a2441abb4..76ec94d77 100644 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala +++ b/engine/src/main/scala/cromwell/webservice/CromwellApiHandler.scala @@ -21,8 +21,8 @@ object CromwellApiHandler { sealed trait ApiHandlerMessage - final case class ApiHandlerWorkflowSubmit(source: WorkflowSourceFiles) extends ApiHandlerMessage - final case class ApiHandlerWorkflowSubmitBatch(sources: NonEmptyList[WorkflowSourceFiles]) extends ApiHandlerMessage + final case class ApiHandlerWorkflowSubmit(source: WorkflowSourceFilesCollection) extends ApiHandlerMessage + final case class ApiHandlerWorkflowSubmitBatch(sources: NonEmptyList[WorkflowSourceFilesCollection]) extends ApiHandlerMessage final case class ApiHandlerWorkflowQuery(uri: Uri, parameters: Seq[(String, String)]) extends ApiHandlerMessage final case class ApiHandlerWorkflowStatus(id: WorkflowId) extends ApiHandlerMessage final case class ApiHandlerWorkflowOutputs(id: WorkflowId) extends ApiHandlerMessage @@ -66,7 +66,9 @@ class CromwellApiHandler(requestHandlerActor: ActorRef) extends Actor with Workf case WorkflowStoreActor.WorkflowSubmittedToStore(id) => context.parent ! RequestComplete((StatusCodes.Created, WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString))) - case ApiHandlerWorkflowSubmitBatch(sources) => requestHandlerActor ! WorkflowStoreActor.BatchSubmitWorkflows(sources) + case ApiHandlerWorkflowSubmitBatch(sources) => requestHandlerActor ! + WorkflowStoreActor.BatchSubmitWorkflows(sources.map(x => WorkflowSourceFilesWithoutImports(x.wdlSource,x.inputsJson,x.workflowOptionsJson))) + case WorkflowStoreActor.WorkflowsBatchSubmittedToStore(ids) => val responses = ids map { id => WorkflowSubmitResponse(id.toString, WorkflowSubmitted.toString) } diff --git a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala index 54ddf2786..34fff945b 100644 --- a/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala +++ b/engine/src/main/scala/cromwell/webservice/CromwellApiService.scala @@ -1,9 +1,9 @@ package cromwell.webservice import akka.actor._ -import java.lang.Throwable + import cats.data.NonEmptyList -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowOptionsJson, WorkflowSourceFilesCollection} import cromwell.engine.backend.BackendConfiguration import cromwell.services.metadata.MetadataService._ import cromwell.webservice.WorkflowJsonSupport._ @@ -14,6 +14,9 @@ import spray.http._ import spray.httpx.SprayJsonSupport._ import spray.json._ import spray.routing._ +import wdl4s.{WdlJson, WdlSource} + +import scala.util.{Failure, Success, Try} trait SwaggerService extends SwaggerUiResourceHttpService { override def swaggerServiceName = "cromwell" @@ -50,8 +53,8 @@ trait CromwellApiService extends HttpService with PerRequestCreator { perRequest(requestContext, metadataBuilderProps, message) } - private def failBadRequest(exception: Exception, statusCode: StatusCode = StatusCodes.BadRequest) = respondWithMediaType(`application/json`) { - complete((statusCode, APIResponse.fail(exception).toJson.prettyPrint)) + private def failBadRequest(t: Throwable, statusCode: StatusCode = StatusCodes.BadRequest) = respondWithMediaType(`application/json`) { + complete((statusCode, APIResponse.fail(t).toJson.prettyPrint)) } val workflowRoutes = queryRoute ~ queryPostRoute ~ workflowOutputsRoute ~ submitRoute ~ submitBatchRoute ~ @@ -124,18 +127,70 @@ trait CromwellApiService extends HttpService with PerRequestCreator { } } + case class PartialWorkflowSources(wdlSource: Option[WdlSource], workflowInputs: Seq[WdlJson], workflowInputsAux: Map[Int, WdlJson], workflowOptions: Option[WorkflowOptionsJson], zippedImports: Option[Array[Byte]]) + object PartialWorkflowSources { + private def workflowInputs(bodyPart: BodyPart): Seq[WdlJson] = { + import spray.json._ + bodyPart.entity.data.asString.parseJson match { + case JsArray(Seq(x, xs@_*)) => (List(x) ++ xs).map(_.compactPrint) + case JsArray(_) => Seq.empty + case v: JsValue => Seq(v.compactPrint) + } + } + + def partialSourcesToSourceCollections(partialSources: Try[PartialWorkflowSources], allowNoInputs: Boolean): Try[Seq[WorkflowSourceFilesCollection]] = { + partialSources flatMap { + case PartialWorkflowSources(Some(wdlSource), workflowInputs, workflowInputsAux, workflowOptions, wdlDependencies) => + //The order of addition allows for the expected override of colliding keys. + val sortedInputAuxes = workflowInputsAux.toSeq.sortBy(_._1).map(x => Option(x._2)) + val wfInputs: Try[Seq[WdlJson]] = if (workflowInputs.isEmpty) { + if (allowNoInputs) Success(Seq("{}")) else Failure(new IllegalArgumentException("No inputs were provided")) + } else Success(workflowInputs map { workflowInputSet => + mergeMaps(Seq(Option(workflowInputSet)) ++ sortedInputAuxes).toString + }) + wfInputs.map(_.map(x => WorkflowSourceFilesCollection(wdlSource, x, workflowOptions.getOrElse("{}"), wdlDependencies))) + case other => Failure(new IllegalArgumentException(s"Incomplete workflow submission: $other")) + } + } + + def fromSubmitRoute(formData: MultipartFormData, allowNoInputs: Boolean): Try[Seq[WorkflowSourceFilesCollection]] = { + val partialSources = Try(formData.fields.foldLeft(PartialWorkflowSources(None, Seq.empty, Map.empty, None, None)) { (partialSources: PartialWorkflowSources, bodyPart: BodyPart) => + if (bodyPart.name.contains("wdlSource")) { + partialSources.copy(wdlSource = Some(bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("workflowInputs")) { + partialSources.copy(workflowInputs = workflowInputs(bodyPart)) + } else if (bodyPart.name.forall(_.startsWith("workflowInputs_"))) { + val index = bodyPart.name.get.stripPrefix("workflowInputs_").toInt + partialSources.copy(workflowInputsAux = partialSources.workflowInputsAux + (index -> bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("workflowOptions")) { + partialSources.copy(workflowOptions = Some(bodyPart.entity.data.asString)) + } else if (bodyPart.name.contains("wdlDependencies")) { + partialSources.copy(zippedImports = Some(bodyPart.entity.data.toByteArray)) + } else { + throw new IllegalArgumentException(s"Unexpected body part name: ${bodyPart.name.getOrElse("None")}") + } + }) + partialSourcesToSourceCollections(partialSources, allowNoInputs) + } + } + def submitRoute = path("workflows" / Segment) { version => post { - formFields("wdlSource", "workflowInputs".?, "workflowInputs_2".?, "workflowInputs_3".?, - "workflowInputs_4".?, "workflowInputs_5".?, "workflowOptions".?) { - (wdlSource, workflowInputs, workflowInputs_2, workflowInputs_3, workflowInputs_4, workflowInputs_5, workflowOptions) => - requestContext => - //The order of addition allows for the expected override of colliding keys. - val wfInputs = mergeMaps(Seq(workflowInputs, workflowInputs_2, workflowInputs_3, workflowInputs_4, workflowInputs_5)).toString - - val workflowSourceFiles = WorkflowSourceFiles(wdlSource, wfInputs, workflowOptions.getOrElse("{}")) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmit(workflowSourceFiles)) + entity(as[MultipartFormData]) { formData => + requestContext => { + PartialWorkflowSources.fromSubmitRoute(formData, allowNoInputs = true) match { + case Success(workflowSourceFiles) if workflowSourceFiles.size == 1 => + perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmit(workflowSourceFiles.head)) + case Success(workflowSourceFiles) => + failBadRequest(new IllegalArgumentException("To submit more than one workflow at a time, use the batch endpoint.")) + case Failure(t) => + System.err.println(t) + t.printStackTrace(System.err) + failBadRequest(t) + } + () + } } } } @@ -143,19 +198,18 @@ trait CromwellApiService extends HttpService with PerRequestCreator { def submitBatchRoute = path("workflows" / Segment / "batch") { version => post { - formFields("wdlSource", "workflowInputs", "workflowOptions".?) { - (wdlSource, workflowInputs, workflowOptions) => - requestContext => - import spray.json._ - workflowInputs.parseJson match { - case JsArray(Seq(x, xs@_*)) => - val nelInputses = NonEmptyList.of(x, xs: _*) - val sources = nelInputses.map(inputs => WorkflowSourceFiles(wdlSource, inputs.compactPrint, workflowOptions.getOrElse("{}"))) - perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmitBatch(sources)) - case JsArray(_) => failBadRequest(new RuntimeException("Nothing was submitted")) - case _ => reject - } - () + entity(as[MultipartFormData]) { formData => + requestContext => { + PartialWorkflowSources.fromSubmitRoute(formData, allowNoInputs = false) match { + case Success(workflowSourceFiles) => + perRequest(requestContext, CromwellApiHandler.props(workflowStoreActor), CromwellApiHandler.ApiHandlerWorkflowSubmitBatch(NonEmptyList.fromListUnsafe(workflowSourceFiles.toList))) + case Failure(t) => + System.err.println(t) + t.printStackTrace(System.err) + failBadRequest(t) + } + () + } } } } @@ -183,13 +237,21 @@ trait CromwellApiService extends HttpService with PerRequestCreator { parameterMultiMap { parameters => val includeKeysOption = NonEmptyList.fromList(parameters.getOrElse("includeKey", List.empty)) val excludeKeysOption = NonEmptyList.fromList(parameters.getOrElse("excludeKey", List.empty)) - (includeKeysOption, excludeKeysOption) match { - case (Some(_), Some(_)) => + val expandSubWorkflowsOption = { + parameters.get("expandSubWorkflows") match { + case Some(v :: Nil) => Try(v.toBoolean) + case _ => Success(false) + } + } + + (includeKeysOption, excludeKeysOption, expandSubWorkflowsOption) match { + case (Some(_), Some(_), _) => failBadRequest(new IllegalArgumentException("includeKey and excludeKey may not be specified together")) - case _ => + case (_, _, Success(expandSubWorkflows)) => withRecognizedWorkflowId(possibleWorkflowId) { id => - handleMetadataRequest(GetSingleWorkflowMetadataAction(id, includeKeysOption, excludeKeysOption)) + handleMetadataRequest(GetSingleWorkflowMetadataAction(id, includeKeysOption, excludeKeysOption, expandSubWorkflows)) } + case (_, _, Failure(ex)) => failBadRequest(new IllegalArgumentException(ex)) } } } diff --git a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala index 3b83955a3..047eeccb9 100644 --- a/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala +++ b/engine/src/main/scala/cromwell/webservice/EngineStatsActor.scala @@ -19,9 +19,10 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor private var jobCounts = Map.empty[ActorRef, Int] /* - It's possible that WorkflowActors might disappear behind us and never manage to write us back. - Instead of waiting longingly, watching a mailbox which might never receive some love instead wait - a specified period of time and assume anything which was going to reply already has + * FIXME + * Because of sub workflows there is currently no reliable way to know if we received responses from all running WEAs. + * For now, we always wait for the timeout duration before responding to give a chance to all WEAs to respond (even nested ones). + * This could be improved by having WEAs wait for their sub WEAs before sending back the response. */ val scheduledMsg = context.system.scheduler.scheduleOnce(timeout, self, ShutItDown) @@ -31,7 +32,6 @@ final case class EngineStatsActor(workflowActors: List[ActorRef], replyTo: Actor override def receive = { case JobCount(count) => jobCounts += (sender -> count) - if (jobCounts.size == workflowActors.size) reportStats() case ShutItDown => reportStats() case wompWomp => log.error("Unexpected message to EngineStatsActor: {}", wompWomp) @@ -59,5 +59,5 @@ object EngineStatsActor { final case class EngineStats(workflows: Int, jobs: Int) - val MaxTimeToWait = 30 seconds + val MaxTimeToWait = 3 seconds } diff --git a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala index a294c58fc..b2afe311f 100644 --- a/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala +++ b/engine/src/main/scala/cromwell/webservice/WorkflowJsonSupport.scala @@ -1,13 +1,15 @@ package cromwell.webservice +import java.nio.file.Paths import java.time.OffsetDateTime -import cromwell.core.WorkflowSourceFiles +import cromwell.core._ import cromwell.engine._ import cromwell.services.metadata.MetadataService import MetadataService.{WorkflowQueryResponse, WorkflowQueryResult} import cromwell.util.JsonFormatting.WdlValueJsonFormatter import WdlValueJsonFormatter._ +import better.files.File import spray.json.{DefaultJsonProtocol, JsString, JsValue, RootJsonFormat} object WorkflowJsonSupport extends DefaultJsonProtocol { @@ -18,7 +20,17 @@ object WorkflowJsonSupport extends DefaultJsonProtocol { implicit val callOutputResponseProtocol = jsonFormat3(CallOutputResponse) implicit val engineStatsProtocol = jsonFormat2(EngineStatsActor.EngineStats) implicit val callAttempt = jsonFormat2(CallAttempt) - implicit val workflowSourceData = jsonFormat3(WorkflowSourceFiles) + implicit val workflowSourceData = jsonFormat3(WorkflowSourceFilesWithoutImports) + + implicit object fileJsonFormat extends RootJsonFormat[File] { + override def write(obj: File) = JsString(obj.path.toAbsolutePath.toString) + override def read(json: JsValue): File = json match { + case JsString(str) => Paths.get(str) + case unknown => throw new NotImplementedError(s"Cannot parse $unknown to a File") + } + } + + implicit val workflowSourceDataWithImports = jsonFormat4(WorkflowSourceFilesWithDependenciesZip) implicit val errorResponse = jsonFormat3(FailureResponse) implicit val successResponse = jsonFormat3(SuccessResponse) diff --git a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala b/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala index d9ed77438..f51e64187 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/IndexedJsonValue.scala @@ -4,6 +4,7 @@ import java.time.OffsetDateTime import cats.{Monoid, Semigroup} import cats.instances.map._ +import cromwell.services.metadata.CallMetadataKeys import spray.json._ @@ -30,20 +31,33 @@ object IndexedJsonValue { /** Customized version of Json data structure, to account for timestamped values and lazy array creation */ sealed trait TimestampedJsValue { - def toJson: JsValue + def toJson(expandedValues: Map[String, JsValue]): JsValue def timestamp: OffsetDateTime } private case class TimestampedJsList(v: Map[Int, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsArray(v.values.toVector map { _.toJson }) + override def toJson(expandedValues: Map[String, JsValue]) = JsArray(v.values.toVector map { _.toJson(expandedValues) }) } private case class TimestampedJsObject(v: Map[String, TimestampedJsValue], timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = JsObject(v mapValues { _.toJson }) + override def toJson(expandedValues: Map[String, JsValue]) = { + val mappedValues = v map { + case (key, subWorkflowId: TimestampedJsPrimitive) if key == CallMetadataKeys.SubWorkflowId => + val subId = subWorkflowId.v.asInstanceOf[JsString] + expandedValues.get(subId.value) map { subMetadata => + CallMetadataKeys.SubWorkflowMetadata -> subMetadata + } getOrElse { + key -> subWorkflowId.v + } + case (key, value) => key -> value.toJson(expandedValues) + } + + JsObject(mappedValues) + } } private class TimestampedJsPrimitive(val v: JsValue, val timestamp: OffsetDateTime) extends TimestampedJsValue { - override val toJson = v + override def toJson(expandedValues: Map[String, JsValue]) = v } private case class TimestampedEmptyJson(override val timestamp: OffsetDateTime) extends TimestampedJsPrimitive(JsObject(Map.empty[String, JsValue]), timestamp) \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala index 0653be425..272e94c75 100644 --- a/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala +++ b/engine/src/main/scala/cromwell/webservice/metadata/MetadataBuilderActor.scala @@ -13,8 +13,8 @@ import cromwell.services.ServiceRegistryActor.ServiceRegistryFailure import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ import cromwell.webservice.PerRequest.{RequestComplete, RequestCompleteWithHeaders} -import cromwell.webservice.metadata.MetadataBuilderActor.{Idle, MetadataBuilderActorState, WaitingForMetadataService} -import cromwell.webservice.{APIResponse, WorkflowJsonSupport} +import cromwell.webservice.metadata.MetadataBuilderActor.{Idle, MetadataBuilderActorData, MetadataBuilderActorState, WaitingForMetadataService, WaitingForSubWorkflows} +import cromwell.webservice.{APIResponse, PerRequestCreator, WorkflowJsonSupport} import org.slf4j.LoggerFactory import spray.http.{StatusCodes, Uri} import spray.httpx.SprayJsonSupport._ @@ -29,7 +29,21 @@ object MetadataBuilderActor { sealed trait MetadataBuilderActorState case object Idle extends MetadataBuilderActorState case object WaitingForMetadataService extends MetadataBuilderActorState - + case object WaitingForSubWorkflows extends MetadataBuilderActorState + + case class MetadataBuilderActorData( + originalQuery: MetadataQuery, + originalEvents: Seq[MetadataEvent], + subWorkflowsMetadata: Map[String, JsValue], + waitFor: Int + ) { + def withSubWorkflow(id: String, metadata: JsValue) = { + this.copy(subWorkflowsMetadata = subWorkflowsMetadata + ((id, metadata))) + } + + def isComplete = subWorkflowsMetadata.size == waitFor + } + def props(serviceRegistryActor: ActorRef) = { Props(new MetadataBuilderActor(serviceRegistryActor)).withDispatcher(ApiDispatcher) } @@ -138,8 +152,8 @@ object MetadataBuilderActor { events.toList map { e => keyValueToIndexedJson(e.key.key, e.value, e.offsetDateTime) } combineAll } - private def eventsToAttemptMetadata(attempt: Int, events: Seq[MetadataEvent]) = { - val withAttemptField = JsObject(eventsToIndexedJson(events).toJson.asJsObject.fields + (AttemptKey -> JsNumber(attempt))) + private def eventsToAttemptMetadata(expandedValues: Map[String, JsValue])(attempt: Int, events: Seq[MetadataEvent]) = { + val withAttemptField = JsObject(eventsToIndexedJson(events).toJson(expandedValues).asJsObject.fields + (AttemptKey -> JsNumber(attempt))) MetadataForAttempt(attempt, withAttemptField) } @@ -160,10 +174,10 @@ object MetadataBuilderActor { workflowNonStatusEvents ++ sortedStateEvents.headOption.toList } - private def parseWorkflowEventsToTimestampedJsValue(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean): JsObject = { + private def parseWorkflowEventsToTimestampedJsValue(events: Seq[MetadataEvent], includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue]): JsObject = { // Partition if sequence of events in a pair of (Workflow level events, Call level events) val (workflowLevel, callLevel) = events partition { _.key.jobKey.isEmpty } - val foldedWorkflowValues = eventsToIndexedJson(reduceWorkflowEvents(workflowLevel)).toJson.asJsObject + val foldedWorkflowValues = eventsToIndexedJson(reduceWorkflowEvents(workflowLevel)).toJson(expandedValues).asJsObject val callsGroupedByFQN = callLevel groupBy { _.key.jobKey.get.callFqn } val callsGroupedByFQNAndIndex = callsGroupedByFQN mapValues { _ groupBy { _.key.jobKey.get.index } } @@ -171,7 +185,7 @@ object MetadataBuilderActor { val callsMap = callsGroupedByFQNAndIndexAndAttempt mapValues { eventsForIndex => eventsForIndex mapValues { eventsForAttempt => - eventsForAttempt map Function.tupled(eventsToAttemptMetadata) + eventsForAttempt map Function.tupled(eventsToAttemptMetadata(expandedValues)) } map { Function.tupled(attemptMetadataToIndexMetadata) } } mapValues { md => JsArray(md.toVector.sortBy(_.index) flatMap { _.metadata }) } @@ -180,13 +194,13 @@ object MetadataBuilderActor { JsObject(foldedWorkflowValues.fields ++ callData) } - private def parseWorkflowEvents(includeCallsIfEmpty: Boolean)(events: Seq[MetadataEvent]): JsObject = parseWorkflowEventsToTimestampedJsValue(events, includeCallsIfEmpty) + private def parseWorkflowEvents(includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue])(events: Seq[MetadataEvent]): JsObject = parseWorkflowEventsToTimestampedJsValue(events, includeCallsIfEmpty, expandedValues) /** * Parse a Seq of MetadataEvent into a full Json metadata response. */ - private def parse(events: Seq[MetadataEvent]): JsObject = { - JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true)) + private def parse(events: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { + JsObject(events.groupBy(_.key.workflowId.toString) mapValues parseWorkflowEvents(includeCallsIfEmpty = true, expandedValues)) } implicit class EnhancedMetadataValue(val value: MetadataValue) extends AnyVal { @@ -194,12 +208,12 @@ object MetadataBuilderActor { } } -class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Unit] +class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[MetadataBuilderActorState, Option[MetadataBuilderActorData]] with DefaultJsonProtocol with WorkflowQueryPagination { import WorkflowJsonSupport._ - startWith(Idle, ()) + startWith(Idle, None) val tag = self.path.name when(Idle) { @@ -214,9 +228,8 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me } when(WaitingForMetadataService) { - case Event(MetadataLookupResponse(query, metadata), _) => - context.parent ! RequestComplete((StatusCodes.OK, processMetadataResponse(query, metadata))) - allDone + case Event(MetadataLookupResponse(query, metadata), None) => + processMetadataResponse(query, metadata) case Event(StatusLookupResponse(w, status), _) => context.parent ! RequestComplete((StatusCodes.OK, processStatusResponse(w, status))) allDone @@ -225,7 +238,6 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me context.parent ! RequestComplete((StatusCodes.InternalServerError, response)) allDone case Event(WorkflowQuerySuccess(uri: Uri, response, metadata), _) => - import WorkflowJsonSupport._ context.parent ! RequestCompleteWithHeaders(response, generateLinkHeaders(uri, metadata):_*) allDone case Event(failure: WorkflowQueryFailure, _) => @@ -235,10 +247,10 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me // Add in an empty output event if there aren't already any output events. val hasOutputs = events exists { _.key.key.startsWith(WorkflowMetadataKeys.Outputs + ":") } val updatedEvents = if (hasOutputs) events else MetadataEvent.empty(MetadataKey(id, None, WorkflowMetadataKeys.Outputs)) +: events - context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false))) + context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(id, updatedEvents, includeCallsIfEmpty = false, Map.empty))) allDone case Event(LogsResponse(w, l), _) => - context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(w, l, includeCallsIfEmpty = false))) + context.parent ! RequestComplete((StatusCodes.OK, workflowMetadataResponse(w, l, includeCallsIfEmpty = false, Map.empty))) allDone case Event(failure: MetadataServiceFailure, _) => context.parent ! RequestComplete((StatusCodes.InternalServerError, APIResponse.error(failure.reason))) @@ -249,14 +261,76 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me context stop self stay() } + + when(WaitingForSubWorkflows) { + case Event(RequestComplete(metadata), Some(data)) => + processSubWorkflowMetadata(metadata, data) + } + + whenUnhandled { + case Event(message, data) => + log.error(s"Received unexpected message $message in state $stateName with data $data") + stay() + } + + def processSubWorkflowMetadata(metadataResponse: Any, data: MetadataBuilderActorData) = { + metadataResponse match { + case (StatusCodes.OK, js: JsObject) => + js.fields.get(WorkflowMetadataKeys.Id) match { + case Some(subId: JsString) => + val newData = data.withSubWorkflow(subId.value, js) + + if (newData.isComplete) { + buildAndStop(data.originalQuery, data.originalEvents, newData.subWorkflowsMetadata) + } else { + stay() using Option(newData) + } + case _ => failAndDie(new RuntimeException("Received unexpected response while waiting for sub workflow metadata.")) + } + case _ => failAndDie(new RuntimeException("Failed to retrieve metadata for a sub workflow.")) + } + } + + def failAndDie(reason: Throwable) = { + context.parent ! RequestComplete((StatusCodes.InternalServerError, APIResponse.error(reason))) + context stop self + stay() + } + + def buildAndStop(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]) = { + context.parent ! RequestComplete((StatusCodes.OK, processMetadataEvents(query, eventsList, expandedValues))) + allDone + } + + def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]) = { + if (query.expandSubWorkflows) { + // Scan events for sub workflow ids + val subWorkflowIds = eventsList.collect({ + case MetadataEvent(key, value, _) if key.key.endsWith(CallMetadataKeys.SubWorkflowId) => value map { _.value } + }).flatten + + // If none is found just proceed to build metadata + if (subWorkflowIds.isEmpty) buildAndStop(query, eventsList, Map.empty) + else { + // Otherwise spin up a metadata builder actor for each sub workflow + subWorkflowIds foreach { subId => + val subMetadataBuilder = context.actorOf(MetadataBuilderActor.props(serviceRegistryActor), PerRequestCreator.endpointActorName) + subMetadataBuilder ! GetMetadataQueryAction(query.copy(workflowId = WorkflowId.fromString(subId))) + } + goto(WaitingForSubWorkflows) using Option(MetadataBuilderActorData(query, eventsList, Map.empty, subWorkflowIds.size)) + } + } else { + buildAndStop(query, eventsList, Map.empty) + } + } - def processMetadataResponse(query: MetadataQuery, eventsList: Seq[MetadataEvent]): JsObject = { + def processMetadataEvents(query: MetadataQuery, eventsList: Seq[MetadataEvent], expandedValues: Map[String, JsValue]): JsObject = { // Should we send back some message ? Or even fail the request instead ? if (eventsList.isEmpty) JsObject(Map.empty[String, JsValue]) else { query match { - case MetadataQuery(w, _, _, _, _) => workflowMetadataResponse(w, eventsList) - case _ => MetadataBuilderActor.parse(eventsList) + case MetadataQuery(w, _, _, _, _, _) => workflowMetadataResponse(w, eventsList, includeCallsIfEmpty = true, expandedValues) + case _ => MetadataBuilderActor.parse(eventsList, expandedValues) } } } @@ -268,7 +342,7 @@ class MetadataBuilderActor(serviceRegistryActor: ActorRef) extends LoggingFSM[Me )) } - private def workflowMetadataResponse(workflowId: WorkflowId, eventsList: Seq[MetadataEvent], includeCallsIfEmpty: Boolean = true) = { - JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty)(eventsList).fields + ("id" -> JsString(workflowId.toString))) + private def workflowMetadataResponse(workflowId: WorkflowId, eventsList: Seq[MetadataEvent], includeCallsIfEmpty: Boolean, expandedValues: Map[String, JsValue]) = { + JsObject(MetadataBuilderActor.parseWorkflowEvents(includeCallsIfEmpty, expandedValues)(eventsList).fields + ("id" -> JsString(workflowId.toString))) } } diff --git a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala index 00a530f31..537440935 100644 --- a/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayOfArrayCoercionSpec.scala @@ -6,14 +6,14 @@ import wdl4s.values.{WdlArray, WdlString} import cromwell.util.SampleWdl -class ArrayOfArrayCoercionSpec extends CromwellTestkitSpec { +class ArrayOfArrayCoercionSpec extends CromwellTestKitSpec { "A workflow that has an Array[Array[File]] input " should { "accept an Array[Array[String]] as the value for the input" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ArrayOfArrays, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "wf.subtask.concatenated" -> WdlArray(WdlArrayType(WdlStringType), Seq( + "wf_subtask_concatenated" -> WdlArray(WdlArrayType(WdlStringType), Seq( WdlString("foo\nbar\nbaz"), WdlString("third\nfourth") )) diff --git a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala index 843796c1a..9c9708307 100644 --- a/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ArrayWorkflowSpec.scala @@ -5,15 +5,15 @@ import java.nio.file.Files import akka.testkit._ import better.files._ import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow} import wdl4s.expression.NoFunctions import wdl4s.types.{WdlArrayType, WdlFileType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} -class ArrayWorkflowSpec extends CromwellTestkitSpec { +class ArrayWorkflowSpec extends CromwellTestKitSpec { val tmpDir = Files.createTempDirectory("ArrayWorkflowSpec") - val ns = NamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).wdlSource("")) + val ns = WdlNamespaceWithWorkflow.load(SampleWdl.ArrayLiteral(tmpDir).wdlSource(""), Seq.empty[ImportResolver]) val expectedArray = WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile("f1"), WdlFile("f2"), WdlFile("f3"))) "A task which contains a parameter " should { @@ -22,9 +22,9 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.ArrayIO, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "wf.count_lines.count" -> WdlInteger(3), - "wf.count_lines_array.count" -> WdlInteger(3), - "wf.serialize.contents" -> WdlString("str1\nstr2\nstr3") + "wf_count_lines_count" -> WdlInteger(3), + "wf_count_lines_array_count" -> WdlInteger(3), + "wf_serialize_contents" -> WdlString("str1\nstr2\nstr3") ) ) } @@ -32,7 +32,7 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { "A static Array[File] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "arr"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "arr"}.getOrElse { fail("Expected declaration 'arr' to be found") } val expression = declaration.expression.getOrElse { @@ -47,14 +47,14 @@ class ArrayWorkflowSpec extends CromwellTestkitSpec { val catTask = ns.findTask("cat").getOrElse { fail("Expected to find task 'cat'") } - val command = catTask.instantiateCommand(Map("files" -> expectedArray), NoFunctions).getOrElse { + val command = catTask.instantiateCommand(catTask.inputsFromMap(Map("cat.files" -> expectedArray)), NoFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat -s f1 f2 f3" } "Coerce Array[String] to Array[File] when running the workflow" in { val outputs = Map( - "wf.cat.lines" -> WdlArray(WdlArrayType(WdlStringType), Seq( + "wf_cat_lines" -> WdlArray(WdlArrayType(WdlStringType), Seq( WdlString("line1"), WdlString("line2"), WdlString("line3"), diff --git a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala index cea47fe8d..202dcecf1 100644 --- a/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/CallCachingWorkflowSpec.scala @@ -11,7 +11,7 @@ import wdl4s.types.{WdlArrayType, WdlIntegerType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} -class CallCachingWorkflowSpec extends CromwellTestkitSpec { +class CallCachingWorkflowSpec extends CromwellTestKitSpec { def cacheHitMessageForCall(name: String) = s"Call Caching: Cache hit. Using UUID\\(.{8}\\):$name\\.*" val expectedOutputs = Map( diff --git a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala index c43346330..b6465a8ac 100644 --- a/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/CopyWorkflowOutputsSpec.scala @@ -9,7 +9,7 @@ import org.scalatest.prop.Tables.Table import scala.language.postfixOps -class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { +class CopyWorkflowOutputsSpec extends CromwellTestKitSpec { "CopyWorkflowOutputsCall" should { "copy workflow outputs" in { @@ -31,7 +31,7 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Seq("A.out", "A.out2", "B.outs") map { o => ("wfoutputs." + o) -> CromwellTestkitSpec.AnyValueIsFine } toMap, + expectedOutputs = Seq("A_out", "A_out2", "B_outs") map { o => ("wfoutputs_" + o) -> CromwellTestKitSpec.AnyValueIsFine } toMap, allowOtherOutputs = false ) @@ -64,7 +64,7 @@ class CopyWorkflowOutputsSpec extends CromwellTestkitSpec { pattern = "transition from FinalizingWorkflowState to WorkflowSucceededState", occurrences = 1), runtime = "", workflowOptions = s""" { "final_workflow_outputs_dir": "$tmpDir" } """, - expectedOutputs = Map("wfoutputs.A.outs" -> CromwellTestkitSpec.AnyValueIsFine), + expectedOutputs = Map("wfoutputs_A_outs" -> CromwellTestKitSpec.AnyValueIsFine), allowOtherOutputs = false ) diff --git a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala similarity index 94% rename from engine/src/test/scala/cromwell/CromwellTestkitSpec.scala rename to engine/src/test/scala/cromwell/CromwellTestKitSpec.scala index 090d94fd1..c30488027 100644 --- a/engine/src/test/scala/cromwell/CromwellTestkitSpec.scala +++ b/engine/src/test/scala/cromwell/CromwellTestKitSpec.scala @@ -2,12 +2,13 @@ package cromwell import java.nio.file.Paths import java.util.UUID +import java.util.concurrent.atomic.AtomicInteger import akka.actor.{Actor, ActorRef, ActorSystem, Props, Terminated} import akka.pattern.ask import akka.testkit._ import com.typesafe.config.{Config, ConfigFactory} -import cromwell.CromwellTestkitSpec._ +import cromwell.CromwellTestKitSpec._ import cromwell.backend._ import cromwell.core._ import cromwell.engine.backend.BackendConfigurationEntry @@ -21,6 +22,7 @@ import cromwell.server.{CromwellRootActor, CromwellSystem} import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataQuery import cromwell.services.metadata.MetadataService._ +import cromwell.subworkflowstore.EmptySubWorkflowStoreActor import cromwell.util.SampleWdl import cromwell.webservice.PerRequest.RequestComplete import cromwell.webservice.metadata.MetadataBuilderActor @@ -30,7 +32,7 @@ import org.scalatest.time.{Millis, Seconds, Span} import org.scalatest.{BeforeAndAfterAll, Matchers, OneInstancePerTest, WordSpecLike} import spray.http.StatusCode import spray.json._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} import wdl4s.types._ import wdl4s.values._ @@ -43,7 +45,7 @@ import scala.util.matching.Regex case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, @@ -63,7 +65,7 @@ case class TestBackendLifecycleActorFactory(configurationDescriptor: BackendConf case class OutputNotFoundException(outputFqn: String, actualOutputs: String) extends RuntimeException(s"Expected output $outputFqn was not found in: '$actualOutputs'") case class LogNotFoundException(log: String) extends RuntimeException(s"Expected log $log was not found") -object CromwellTestkitSpec { +object CromwellTestKitSpec { val ConfigText = """ |akka { @@ -121,9 +123,11 @@ object CromwellTestkitSpec { val TimeoutDuration = 60 seconds + private val testWorkflowManagerSystemCount = new AtomicInteger() + class TestWorkflowManagerSystem extends CromwellSystem { - override protected def systemName: String = "test-system" - override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + override protected def systemName: String = "test-system-" + testWorkflowManagerSystemCount.incrementAndGet() + override protected def newActorSystem() = ActorSystem(systemName, ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) /** * Do NOT shut down the test actor system inside the normal flow. * The actor system will be externally shutdown outside the block. @@ -135,18 +139,6 @@ object CromwellTestkitSpec { } /** - * Loans a test actor system. NOTE: This should be run OUTSIDE of a wait block, never within one. - */ - def withTestWorkflowManagerSystem[T](block: CromwellSystem => T): T = { - val testWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem - try { - block(testWorkflowManagerSystem) - } finally { - TestKit.shutdownActorSystem(testWorkflowManagerSystem.actorSystem, TimeoutDuration) - } - } - - /** * Wait for exactly one occurrence of the specified info pattern in the specified block. The block is in its own * parameter list for usage syntax reasons. */ @@ -264,9 +256,11 @@ object CromwellTestkitSpec { } class TestCromwellRootActor(config: Config) extends CromwellRootActor { + override val serverMode = true override lazy val serviceRegistryActor = ServiceRegistryActorInstance override lazy val workflowStore = new InMemoryWorkflowStore - def submitWorkflow(sources: WorkflowSourceFiles): WorkflowId = { + override val abortJobsOnTerminate = false + def submitWorkflow(sources: WorkflowSourceFilesWithoutImports): WorkflowId = { val submitMessage = WorkflowStoreActor.SubmitWorkflow(sources) val result = Await.result(workflowStoreActor.ask(submitMessage)(TimeoutDuration), Duration.Inf).asInstanceOf[WorkflowSubmittedToStore].workflowId workflowManagerActor ! RetrieveNewWorkflows @@ -275,12 +269,12 @@ object CromwellTestkitSpec { } } -abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new CromwellTestkitSpec.TestWorkflowManagerSystem()) extends TestKit(twms.actorSystem) +abstract class CromwellTestKitSpec(val twms: TestWorkflowManagerSystem = new CromwellTestKitSpec.TestWorkflowManagerSystem()) extends TestKit(twms.actorSystem) with DefaultTimeout with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with ScalaFutures with OneInstancePerTest with Eventually { override protected def afterAll() = { twms.shutdownTestActorSystem(); () } - implicit val defaultPatience = PatienceConfig(timeout = Span(30, Seconds), interval = Span(100, Millis)) + implicit val defaultPatience = PatienceConfig(timeout = Span(200, Seconds), interval = Span(1000, Millis)) implicit val ec = system.dispatcher val dummyServiceRegistryActor = system.actorOf(Props.empty) @@ -336,7 +330,7 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro config: Config = DefaultConfig, patienceConfig: PatienceConfig = defaultPatience)(implicit ec: ExecutionContext): Map[FullyQualifiedName, WdlValue] = { val rootActor = buildCromwellRootActor(config) - val sources = WorkflowSourceFiles(sampleWdl.wdlSource(runtime), sampleWdl.wdlJson, workflowOptions) + val sources = WorkflowSourceFilesWithoutImports(sampleWdl.wdlSource(runtime), sampleWdl.wdlJson, workflowOptions) val workflowId = rootActor.underlyingActor.submitWorkflow(sources) eventually { verifyWorkflowState(rootActor.underlyingActor.serviceRegistryActor, workflowId, terminalState) } (config = patienceConfig, pos = implicitly[org.scalactic.source.Position]) val outcome = getWorkflowOutputsFromMetadata(workflowId, rootActor.underlyingActor.serviceRegistryActor) @@ -382,7 +376,7 @@ abstract class CromwellTestkitSpec(val twms: TestWorkflowManagerSystem = new Cro def getWorkflowMetadata(workflowId: WorkflowId, serviceRegistryActor: ActorRef, key: Option[String] = None)(implicit ec: ExecutionContext): JsObject = { // MetadataBuilderActor sends its response to context.parent, so we can't just use an ask to talk to it here - val message = GetMetadataQueryAction(MetadataQuery(workflowId, None, key, None, None)) + val message = GetMetadataQueryAction(MetadataQuery(workflowId, None, key, None, None, expandSubWorkflows = false)) val parentProbe = TestProbe() TestActorRef(MetadataBuilderActor.props(serviceRegistryActor), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") ! message @@ -444,6 +438,10 @@ class AlwaysHappyJobStoreActor extends Actor { } } +object AlwaysHappySubWorkflowStoreActor { + def props: Props = Props(new EmptySubWorkflowStoreActor) +} + object AlwaysHappyJobStoreActor { def props: Props = Props(new AlwaysHappyJobStoreActor) } diff --git a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala index d829b84eb..aeca4777f 100644 --- a/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/DeclarationWorkflowSpec.scala @@ -1,7 +1,7 @@ package cromwell import wdl4s.types.{WdlFileType, WdlStringType} -import wdl4s.{NamespaceWithWorkflow, WorkflowInput} +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow, WorkflowInput} import cromwell.util.SampleWdl import org.scalatest.{Matchers, WordSpecLike} @@ -9,11 +9,11 @@ import org.scalatest.{Matchers, WordSpecLike} class DeclarationWorkflowSpec extends Matchers with WordSpecLike { "A workflow with declarations in it" should { "compute inputs properly" in { - NamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.wdlSource(runtime="")).workflow.inputs shouldEqual Map( - "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType, postfixQuantifier = None), - "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType, postfixQuantifier = None), - "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType, postfixQuantifier = None), - "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType, postfixQuantifier = None) + WdlNamespaceWithWorkflow.load(SampleWdl.DeclarationsWorkflow.wdlSource(runtime=""), Seq.empty[ImportResolver]).workflow.inputs shouldEqual Map( + "two_step.cat.file" -> WorkflowInput("two_step.cat.file", WdlFileType), + "two_step.cgrep.str_decl" -> WorkflowInput("two_step.cgrep.str_decl", WdlStringType), + "two_step.cgrep.pattern" -> WorkflowInput("two_step.cgrep.pattern", WdlStringType), + "two_step.flags_suffix" -> WorkflowInput("two_step.flags_suffix", WdlStringType) ) } } diff --git a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala index 1aaafaf2f..40eb03624 100644 --- a/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/FilePassingWorkflowSpec.scala @@ -6,19 +6,19 @@ import wdl4s.values.{WdlFile, WdlString} import scala.concurrent.duration._ -class FilePassingWorkflowSpec extends CromwellTestkitSpec { +class FilePassingWorkflowSpec extends CromwellTestKitSpec { "A workflow that passes files between tasks" should { "pass files properly" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.FilePassingWorkflow, EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "file_passing.a.out" -> WdlFile("out"), - "file_passing.a.out_interpolation" -> WdlFile("out"), - "file_passing.a.contents" -> WdlString("foo bar baz"), - "file_passing.b.out" -> WdlFile("out"), - "file_passing.b.out_interpolation" -> WdlFile("out"), - "file_passing.b.contents" -> WdlString("foo bar baz") + "file_passing_a_out" -> WdlFile("out"), + "file_passing_a_out_interpolation" -> WdlFile("out"), + "file_passing_a_contents" -> WdlString("foo bar baz"), + "file_passing_b_out" -> WdlFile("out"), + "file_passing_b_out_interpolation" -> WdlFile("out"), + "file_passing_b_contents" -> WdlString("foo bar baz") ), patienceConfig = PatienceConfig(2.minutes.dilated) ) diff --git a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala index 9a00c115e..f13b022c5 100644 --- a/engine/src/test/scala/cromwell/MapWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MapWorkflowSpec.scala @@ -3,17 +3,17 @@ package cromwell import akka.testkit._ import better.files._ import cromwell.util.SampleWdl -import wdl4s.NamespaceWithWorkflow +import wdl4s.{ImportResolver, WdlNamespaceWithWorkflow} import wdl4s.expression.{NoFunctions, WdlFunctions} import wdl4s.types.{WdlFileType, WdlIntegerType, WdlMapType, WdlStringType} import wdl4s.values._ import scala.util.{Success, Try} -class MapWorkflowSpec extends CromwellTestkitSpec { +class MapWorkflowSpec extends CromwellTestKitSpec { private val pwd = File(".") private val sampleWdl = SampleWdl.MapLiteral(pwd.path) - val ns = NamespaceWithWorkflow.load(sampleWdl.wdlSource("")) + val ns = WdlNamespaceWithWorkflow.load(sampleWdl.wdlSource(""), Seq.empty[ImportResolver]) val expectedMap = WdlMap(WdlMapType(WdlFileType, WdlStringType), Map( WdlFile("f1") -> WdlString("alice"), WdlFile("f2") -> WdlString("bob"), @@ -28,12 +28,12 @@ class MapWorkflowSpec extends CromwellTestkitSpec { sampleWdl = sampleWdl, EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), expectedOutputs = Map( - "wf.read_map.out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( + "wf_read_map_out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( WdlString("x") -> WdlInteger(500), WdlString("y") -> WdlInteger(600), WdlString("z") -> WdlInteger(700) )), - "wf.write_map.contents" -> WdlString("f1\talice\nf2\tbob\nf3\tchuck") + "wf_write_map_contents" -> WdlString("f1\talice\nf2\tbob\nf3\tchuck") ) ) sampleWdl.cleanup() @@ -42,7 +42,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { "A static Map[File, String] declaration" should { "be a valid declaration" in { - val declaration = ns.workflow.declarations.find {_.name == "map"}.getOrElse { + val declaration = ns.workflow.declarations.find {_.unqualifiedName == "map"}.getOrElse { fail("Expected declaration 'map' to be found") } val expression = declaration.expression.getOrElse { @@ -64,7 +64,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { case _ => throw new UnsupportedOperationException("Only write_map should be called") } } - val command = writeMapTask.instantiateCommand(Map("file_to_name" -> expectedMap), new CannedFunctions).getOrElse { + val command = writeMapTask.instantiateCommand(writeMapTask.inputsFromMap(Map("file_to_name" -> expectedMap)), new CannedFunctions).getOrElse { fail("Expected instantiation to work") } command shouldEqual "cat /test/map/path" @@ -75,7 +75,7 @@ class MapWorkflowSpec extends CromwellTestkitSpec { sampleWdl, eventFilter = EventFilter.info(pattern = "Starting calls: wf.read_map:NA:1, wf.write_map:NA:1", occurrences = 1), expectedOutputs = Map( - "wf.read_map.out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( + "wf_read_map_out_map" -> WdlMap(WdlMapType(WdlStringType, WdlIntegerType), Map( WdlString("x") -> WdlInteger(500), WdlString("y") -> WdlInteger(600), WdlString("z") -> WdlInteger(700) diff --git a/engine/src/test/scala/cromwell/MetadataWatchActor.scala b/engine/src/test/scala/cromwell/MetadataWatchActor.scala index 691c4efc5..c0c294442 100644 --- a/engine/src/test/scala/cromwell/MetadataWatchActor.scala +++ b/engine/src/test/scala/cromwell/MetadataWatchActor.scala @@ -1,7 +1,7 @@ package cromwell import akka.actor.{Actor, Props} -import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString} +import cromwell.services.metadata.{MetadataEvent, MetadataJobKey, MetadataString, MetadataValue} import cromwell.services.metadata.MetadataService.PutMetadataAction import MetadataWatchActor._ @@ -32,26 +32,36 @@ object MetadataWatchActor { trait Matcher { def matches(events: Traversable[MetadataEvent]): Boolean + private var _nearMisses: List[String] = List.empty + protected def addNearMissInfo(miss: String) = _nearMisses :+= miss + def nearMissInformation = _nearMisses + + def checkMetadataValueContains(key: String, actual: MetadataValue, expected: String): Boolean = { + val result = actual.value.contains(expected) + if (!result) addNearMissInfo(s"Key $key had unexpected value.\nActual value: ${actual.value}\n\nDid not contain: $expected") + result + } } def metadataKeyAttemptChecker(attempt: Int): Option[MetadataJobKey] => Boolean = { case Some(jobKey) => jobKey.attempt == attempt case None => false } + final case class JobKeyMetadataKeyAndValueContainStringMatcher(jobKeyCheck: Option[MetadataJobKey] => Boolean, key: String, value: String) extends Matcher { def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && v.value.contains(value) }) + events.exists(e => e.key.key.contains(key) && jobKeyCheck(e.key.jobKey) && e.value.exists { v => v.valueType == MetadataString && checkMetadataValueContains(e.key.key, v, value) }) } } abstract class KeyMatchesRegexAndValueContainsStringMatcher(keyTemplate: String, value: String) extends Matcher { val templateRegex = keyTemplate.r def matches(events: Traversable[MetadataEvent]): Boolean = { - events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && e.value.exists { v => v.value.contains(value) }) + events.exists(e => templateRegex.findFirstIn(e.key.key).isDefined && + e.value.exists { v => checkMetadataValueContains(e.key.key, v, value) }) } } val failurePattern = """failures\[\d*\].message""" - final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { - } + final case class FailureMatcher(value: String) extends KeyMatchesRegexAndValueContainsStringMatcher(failurePattern, value) { } } diff --git a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala index f0b7a70af..4fc24c56e 100644 --- a/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/MultipleFilesWithSameNameWorkflowSpec.scala @@ -5,15 +5,15 @@ import cromwell.util.SampleWdl import wdl4s.values.WdlString -class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestkitSpec { +class MultipleFilesWithSameNameWorkflowSpec extends CromwellTestKitSpec { "A workflow with two file inputs that have the same name" should { "not clobber one file with the contents of another" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.FileClobber, EventFilter.info(pattern = "Starting calls: two.x:NA:1, two.y:NA:1", occurrences = 1), expectedOutputs = Map( - "two.x.out" -> WdlString("first file.txt"), - "two.y.out" -> WdlString("second file.txt") + "two_x_out" -> WdlString("first file.txt"), + "two_y_out" -> WdlString("second file.txt") ) ) } diff --git a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala index 919008315..73347b794 100644 --- a/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/OptionalParamWorkflowSpec.scala @@ -22,20 +22,20 @@ class OptionalParamWorkflowSpec extends Matchers with WordSpecLike { | call find |} """.stripMargin - val ns = WdlNamespace.load(wf) + val ns = WdlNamespace.loadUsingSource(wf, None, None) val findTask = ns.findTask("find") getOrElse { fail("Expected to find task 'find'") } - val instantiateWithoutValue = findTask.instantiateCommand(Map("root" -> WdlFile("src")), NoFunctions) getOrElse { + val instantiateWithoutValue = findTask.instantiateCommand(findTask.inputsFromMap(Map("find.root" -> WdlFile("src"))), NoFunctions) getOrElse { fail("Expected instantiation to work") } instantiateWithoutValue shouldEqual "find src" - val instantiateWithValue = findTask.instantiateCommand(Map( - "root" -> WdlFile("src"), - "pattern" -> WdlString("*.java") - ), NoFunctions).getOrElse {fail("Expected instantiation to work")} + val instantiateWithValue = findTask.instantiateCommand(findTask.inputsFromMap(Map( + "find.root" -> WdlFile("src"), + "find.pattern" -> WdlString("*.java") + )), NoFunctions).getOrElse {fail("Expected instantiation to work")} instantiateWithValue shouldEqual "find src -name *.java" } } diff --git a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala index 8530dd6d2..c72d98758 100644 --- a/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/PostfixQuantifierWorkflowSpec.scala @@ -5,27 +5,27 @@ import wdl4s.values.WdlString import cromwell.util.SampleWdl -class PostfixQuantifierWorkflowSpec extends CromwellTestkitSpec { +class PostfixQuantifierWorkflowSpec extends CromwellTestKitSpec { "A task which contains a parameter with a zero-or-more postfix quantifier" should { "accept an array of size 3" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice,bob,charles")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice,bob,charles")) ) } "accept an array of size 1" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithOneElementArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice")) ) } "accept an array of size 0" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.ZeroOrMorePostfixQuantifierWorkflowWithZeroElementArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello")) ) } } @@ -35,14 +35,14 @@ class PostfixQuantifierWorkflowSpec extends CromwellTestkitSpec { runWdlAndAssertOutputs( sampleWdl = SampleWdl.OneOrMorePostfixQuantifierWorkflowWithArrayInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice,bob,charles")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice,bob,charles")) ) } "accept a scalar for the value" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.OneOrMorePostfixQuantifierWorkflowWithScalarInput, EventFilter.info(pattern = "Starting calls: postfix.hello", occurrences = 1), - expectedOutputs = Map("postfix.hello.greeting" -> WdlString("hello alice")) + expectedOutputs = Map("postfix_hello_greeting" -> WdlString("hello alice")) ) } } diff --git a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala index 6b706fbc6..5db1f3ebe 100644 --- a/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/RestartWorkflowSpec.scala @@ -6,11 +6,11 @@ import cromwell.core.Tags._ import cromwell.core._ import cromwell.engine.workflow.WorkflowDescriptorBuilder -class RestartWorkflowSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class RestartWorkflowSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder { - val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestkitSpec.ConfigText)) + val actorSystem = ActorSystem("RestartWorkflowSpec", ConfigFactory.parseString(CromwellTestKitSpec.ConfigText)) //val localBackend = new OldStyleLocalBackend(CromwellTestkitSpec.DefaultLocalBackendConfigEntry, actorSystem) - val sources = WorkflowSourceFiles( + val sources = WorkflowSourceFilesWithoutImports( wdlSource="""task a {command{}} |workflow w { | call a diff --git a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala index 0d8847a27..c1ca1ad9e 100644 --- a/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala +++ b/engine/src/test/scala/cromwell/ScatterWorkflowSpec.scala @@ -6,15 +6,15 @@ import wdl4s.types.{WdlArrayType, WdlFileType, WdlIntegerType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlInteger, WdlString} import cromwell.util.SampleWdl -class ScatterWorkflowSpec extends CromwellTestkitSpec { +class ScatterWorkflowSpec extends CromwellTestKitSpec { "A workflow with a stand-alone scatter block in it" should { "run properly" in { runWdlAndAssertOutputs( sampleWdl = SampleWdl.SimpleScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "scatter0.outside_scatter.out" -> WdlInteger(8000), - "scatter0.inside_scatter.out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(1, 2, 3, 4, 5).map(WdlInteger(_))) + "scatter0_outside_scatter_out" -> WdlInteger(8000), + "scatter0_inside_scatter_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(1, 2, 3, 4, 5).map(WdlInteger(_))) ) ) } @@ -25,11 +25,11 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = new SampleWdl.ScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), - "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), - "w.D.D_out" -> WdlInteger(34), - "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) + "w_E_E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), + "w_C_C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), + "w_A_A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), + "w_D_D_out" -> WdlInteger(34), + "w_B_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) ) } @@ -40,12 +40,12 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.SiblingsScatterWdl, eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "w.E.E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), - "w.F.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))), - "w.C.C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), - "w.A.A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "scott").map(WdlString)), - "w.D.D_out" -> WdlInteger(34), - "w.B.B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) + "w_E_E_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(9, 9, 9, 9, 9, 9).map(WdlInteger(_))), + "w_F_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))), + "w_C_C_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(400, 500, 600, 800, 600, 500).map(WdlInteger(_))), + "w_A_A_out" -> WdlArray(WdlArrayType(WdlStringType), Seq("jeff", "chris", "miguel", "thibault", "khalid", "ruchi").map(WdlString)), + "w_D_D_out" -> WdlInteger(34), + "w_B_B_out" -> WdlArray(WdlArrayType(WdlIntegerType), Seq(4, 5, 6, 8, 6, 5).map(WdlInteger(_))) ) ) } @@ -57,9 +57,9 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.PrepareScatterGatherWdl(), eventFilter = EventFilter.info(pattern = "Workflow complete", occurrences = 1), expectedOutputs = Map( - "sc_test.do_gather.sum" -> WdlInteger(11), - "sc_test.do_prepare.split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), - "sc_test.do_scatter.count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) + "sc_test_do_gather_sum" -> WdlInteger(11), + "sc_test_do_prepare_split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), + "sc_test_do_scatter_count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) ) ) } @@ -74,9 +74,9 @@ class ScatterWorkflowSpec extends CromwellTestkitSpec { |} """.stripMargin, expectedOutputs = Map( - "sc_test.do_gather.sum" -> WdlInteger(11), - "sc_test.do_prepare.split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), - "sc_test.do_scatter.count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) + "sc_test_do_gather_sum" -> WdlInteger(11), + "sc_test_do_prepare_split_files" -> WdlArray(WdlArrayType(WdlFileType), Seq("temp_aa", "temp_ab", "temp_ac", "temp_ad").map(WdlFile(_))), + "sc_test_do_scatter_count_file" -> WdlArray(WdlArrayType(WdlFileType), (1 to 4).map(_ => WdlFile("output.txt"))) ) ) } diff --git a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala index f327c6da4..3859368d7 100644 --- a/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala @@ -7,7 +7,7 @@ import akka.testkit._ import com.typesafe.config.ConfigFactory import cromwell.MetadataWatchActor.{FailureMatcher, Matcher} import cromwell.SimpleWorkflowActorSpec._ -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.core.{WorkflowId, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor import cromwell.engine.workflow.WorkflowActor._ @@ -28,13 +28,13 @@ object SimpleWorkflowActorSpec { promise: Promise[Unit]) } -class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { +class SimpleWorkflowActorSpec extends CromwellTestKitSpec with BeforeAndAfter { private def buildWorkflowActor(sampleWdl: SampleWdl, rawInputsOverride: String, workflowId: WorkflowId, matchers: Matcher*): TestableWorkflowActorAndMetadataPromise = { - val workflowSources = WorkflowSourceFiles(sampleWdl.wdlSource(), rawInputsOverride, "{}") + val workflowSources = WorkflowSourceFilesWithoutImports(sampleWdl.wdlSource(), rawInputsOverride, "{}") val promise = Promise[Unit]() val watchActor = system.actorOf(MetadataWatchActor.props(promise, matchers: _*), s"service-registry-$workflowId-${UUID.randomUUID()}") val supervisor = TestProbe() @@ -43,9 +43,11 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { serviceRegistryActor = watchActor, workflowLogCopyRouter = system.actorOf(Props.empty, s"workflow-copy-log-router-$workflowId-${UUID.randomUUID()}"), jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props), - backendSingletonCollection = BackendSingletonCollection(Map("Local" -> None))), + backendSingletonCollection = BackendSingletonCollection(Map("Local" -> None)), + serverMode = true), supervisor = supervisor.ref, name = s"workflow-actor-$workflowId" ) @@ -64,7 +66,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, _) = buildWorkflowActor(SampleWdl.HelloWorld, SampleWdl.HelloWorld.wdlJson, workflowId) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("hello.hello") { + startingCallsFilter("wf_hello.hello") { workflowActor ! StartWorkflowCommand } @@ -75,7 +77,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with missing inputs" in { - val expectedError = "Required workflow input 'hello.hello.addressee' not specified." + val expectedError = "Required workflow input 'wf_hello.hello.addressee' not specified." val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, "{}", workflowId, failureMatcher) val probe = TestProbe() @@ -92,7 +94,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail to construct with inputs of the wrong type" in { - val expectedError = "Could not coerce value for 'hello.hello.addressee' into: WdlStringType" + val expectedError = "Could not coerce JsNumber value for 'wf_hello.hello.addressee' (3) into: WdlStringType" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.HelloWorld, s""" { "$Addressee" : 3} """, workflowId, failureMatcher) @@ -100,7 +102,13 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { val probe = TestProbe() probe watch workflowActor workflowActor ! StartWorkflowCommand - Await.result(promise.future, TestExecutionTimeout) + try { + Await.result(promise.future, TestExecutionTimeout) + } catch { + case e: Throwable => + val info = failureMatcher.nearMissInformation + fail(s"We didn't see the expected error message $expectedError within $TestExecutionTimeout. ${info.mkString(", ")}") + } probe.expectTerminated(workflowActor, AwaitAlmostNothing) supervisor.expectMsgPF(AwaitAlmostNothing, "parent should get a failed response") { case x: WorkflowFailedResponse => @@ -111,12 +119,12 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "fail when a call fails" in { - val expectedError = "Call goodbye.goodbye: return code was 1" + val expectedError = "Call wf_goodbye.goodbye:NA:1: return code was 1" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.GoodbyeWorld, SampleWdl.GoodbyeWorld.wdlJson, workflowId, failureMatcher) val probe = TestProbe() probe watch workflowActor - startingCallsFilter("goodbye.goodbye") { + startingCallsFilter("wf_goodbye.goodbye") { workflowActor ! StartWorkflowCommand } Await.result(promise.future, TestExecutionTimeout) @@ -130,7 +138,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } "gracefully handle malformed WDL" in { - val expectedError = "Input evaluation for Call test1.summary failedVariable 'Can't find bfile' not found" + val expectedError = "Input evaluation for Call test1.summary failed.\nVariable 'bfile' not found" val failureMatcher = FailureMatcher(expectedError) val TestableWorkflowActorAndMetadataPromise(workflowActor, supervisor, promise) = buildWorkflowActor(SampleWdl.CoercionNotDefined, SampleWdl.CoercionNotDefined.wdlJson, workflowId, failureMatcher) val probe = TestProbe() @@ -148,7 +156,7 @@ class SimpleWorkflowActorSpec extends CromwellTestkitSpec with BeforeAndAfter { } private def startingCallsFilter[T](callNames: String*)(block: => T): T = { - import CromwellTestkitSpec.waitForInfo + import CromwellTestKitSpec.waitForInfo within(TestExecutionTimeout) { waitForInfo(s"Starting calls: ${callNames.mkString("", ":NA:1, ", ":NA:1")}$$", 1) { block diff --git a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala index 72c618fca..3052e3370 100644 --- a/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala +++ b/engine/src/test/scala/cromwell/WdlFunctionsAtWorkflowLevelSpec.scala @@ -6,7 +6,7 @@ import wdl4s.types.{WdlMapType, WdlStringType} import wdl4s.values.{WdlMap, WdlString} -class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestkitSpec { +class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestKitSpec { val outputMap = WdlMap(WdlMapType(WdlStringType, WdlStringType), Map( WdlString("k1") -> WdlString("v1"), WdlString("k2") -> WdlString("v2"), @@ -19,8 +19,8 @@ class WdlFunctionsAtWorkflowLevelSpec extends CromwellTestkitSpec { sampleWdl = SampleWdl.WdlFunctionsAtWorkflowLevel, eventFilter = EventFilter.info(pattern = "Starting calls: w.a", occurrences = 1), expectedOutputs = Map( - "w.a.x" -> WdlString("one two three four five"), - "w.a.y" -> outputMap + "w_a_x" -> WdlString("one two three four five"), + "w_a_y" -> outputMap ) ) } diff --git a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala index 1cd7a7ef3..2093cf198 100644 --- a/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowFailSlowSpec.scala @@ -5,7 +5,7 @@ import cromwell.util.SampleWdl // TODO: These tests are (and were) somewhat unsatisfactory. They'd be much better if we use TestFSMRefs and TestProbes to simulate job completions against the WorkflowActor and make sure it only completes the workflow at the appropriate time. -class WorkflowFailSlowSpec extends CromwellTestkitSpec { +class WorkflowFailSlowSpec extends CromwellTestKitSpec { val FailFastOptions = """ |{ diff --git a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala index 18df31795..0f210fd1e 100644 --- a/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/WorkflowOutputsSpec.scala @@ -2,10 +2,10 @@ package cromwell import akka.testkit._ import cromwell.util.SampleWdl -import cromwell.CromwellTestkitSpec.AnyValueIsFine +import cromwell.CromwellTestKitSpec.AnyValueIsFine -class WorkflowOutputsSpec extends CromwellTestkitSpec { +class WorkflowOutputsSpec extends CromwellTestKitSpec { "Workflow outputs" should { "use all outputs if none are specified" in { runWdlAndAssertOutputs( @@ -13,9 +13,9 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "three_step.ps.procs" -> AnyValueIsFine, - "three_step.cgrep.count" -> AnyValueIsFine, - "three_step.wc.count" -> AnyValueIsFine + "three_step_ps_procs" -> AnyValueIsFine, + "three_step_cgrep_count" -> AnyValueIsFine, + "three_step_wc_count" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -27,8 +27,8 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "three_step.cgrep.count" -> AnyValueIsFine, - "three_step.wc.count" -> AnyValueIsFine + "three_step_cgrep_count" -> AnyValueIsFine, + "three_step_wc_count" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -40,8 +40,8 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "scatter0.outside_scatter.out" -> AnyValueIsFine, - "scatter0.inside_scatter.out" -> AnyValueIsFine + "scatter0_outside_scatter_out" -> AnyValueIsFine, + "scatter0_inside_scatter_out" -> AnyValueIsFine ), allowOtherOutputs = false ) @@ -53,7 +53,7 @@ class WorkflowOutputsSpec extends CromwellTestkitSpec { eventFilter = EventFilter.info(pattern = s"is in a terminal state: WorkflowSucceededState", occurrences = 1), runtime = "", expectedOutputs = Map( - "scatter0.inside_scatter.out" -> AnyValueIsFine + "scatter0_inside_scatter_out" -> AnyValueIsFine ), allowOtherOutputs = false ) diff --git a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala index ec512107e..14ccbd6bd 100644 --- a/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/EngineFunctionsSpec.scala @@ -1,21 +1,23 @@ package cromwell.engine -import java.nio.file.{FileSystem, FileSystems, Path} +import java.nio.file.Path -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} +import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} import org.scalatest.prop.TableDrivenPropertyChecks._ import org.scalatest.prop.Tables.Table import org.scalatest.{FlatSpec, Matchers} -import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.expression.{NoFunctions, PureStandardLibraryFunctionsLike, WdlStandardLibraryFunctions} import wdl4s.values.{WdlFile, WdlInteger, WdlString, WdlValue} import scala.util.{Failure, Success, Try} class EngineFunctionsSpec extends FlatSpec with Matchers { - trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureFunctions { + trait WdlStandardLibraryImpl extends WdlStandardLibraryFunctions with ReadLikeFunctions with WriteFunctions with PureStandardLibraryFunctionsLike { private def fail(name: String) = Failure(new NotImplementedError(s"$name() not implemented yet")) + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) override def stdout(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stdout") override def stderr(params: Seq[Try[WdlValue]]): Try[WdlFile] = fail("stderr") } @@ -38,7 +40,7 @@ class EngineFunctionsSpec extends FlatSpec with Matchers { "sub" should "replace a string according to a pattern" in { class TestEngineFn extends WdlStandardLibraryImpl { override def glob(path: String, pattern: String): Seq[String] = ??? - override def fileSystems: List[FileSystem] = List(FileSystems.getDefault) + override def pathBuilders: List[PathBuilder] = List(DefaultPathBuilder) override def writeDirectory: Path = ??? } diff --git a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala index 6a4b0a077..37ed641c8 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowAbortSpec.scala @@ -1,8 +1,8 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec -class WorkflowAbortSpec extends CromwellTestkitSpec { +class WorkflowAbortSpec extends CromwellTestKitSpec { // TODO: When re-enabled, this test also needs to check that child processes have actually been stopped. "A WorkflowManagerActor" should { diff --git a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala index 55faea29f..1d7d313b6 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowManagerActorSpec.scala @@ -1,11 +1,11 @@ package cromwell.engine -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.engine.workflow.WorkflowDescriptorBuilder import cromwell.util.SampleWdl -class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder { +class WorkflowManagerActorSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder { override implicit val actorSystem = system "A WorkflowManagerActor" should { @@ -13,7 +13,7 @@ class WorkflowManagerActorSpec extends CromwellTestkitSpec with WorkflowDescript "run workflows in the correct directory" in { val outputs = runWdl(sampleWdl = SampleWdl.CurrentDirectory) - val outputName = "whereami.whereami.pwd" + val outputName = "wf_whereami_whereami_pwd" val salutation = outputs(outputName) val actualOutput = salutation.valueString.trim actualOutput should endWith("/call-whereami/execution") diff --git a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala index 18460e765..d6719fe85 100644 --- a/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/WorkflowStoreActorSpec.scala @@ -1,17 +1,21 @@ package cromwell.engine import cats.data.NonEmptyList -import cromwell.CromwellTestkitSpec -import cromwell.core.WorkflowId +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.workflow.workflowstore.WorkflowStoreActor._ import cromwell.engine.workflow.workflowstore._ +import cromwell.services.metadata.MetadataQuery +import cromwell.services.metadata.MetadataService.{GetMetadataQueryAction, MetadataLookupResponse} +import cromwell.services.metadata.impl.ReadMetadataActor +import cromwell.util.EncryptionSpec import cromwell.util.SampleWdl.HelloWorld import org.scalatest.Matchers import scala.concurrent.duration._ import scala.language.postfixOps -class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { +class WorkflowStoreActorSpec extends CromwellTestKitSpec with Matchers { val helloWorldSourceFiles = HelloWorld.asWorkflowSources() /** @@ -31,17 +35,22 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { list.foldLeft((List.empty[WorkflowToStart], true))(folderFunction)._2 } + private def prettyOptions(workflowSourceFiles: WorkflowSourceFilesCollection): WorkflowSourceFilesCollection = { + import spray.json._ + workflowSourceFiles.copyOptions(workflowSourceFiles.workflowOptionsJson.parseJson.prettyPrint) + } + "The WorkflowStoreActor" should { "return an ID for a submitted workflow" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! SubmitWorkflow(helloWorldSourceFiles) expectMsgType[WorkflowSubmittedToStore](10 seconds) } "return 3 IDs for a batch submission of 3" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) expectMsgPF(10 seconds) { case WorkflowsBatchSubmittedToStore(ids) => ids.toList.size shouldBe 3 @@ -50,7 +59,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "fetch exactly N workflows" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList @@ -63,15 +72,65 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { workflowNel map { case WorkflowToStart(id, sources, state) => insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles + sources shouldBe prettyOptions(helloWorldSourceFiles) state shouldBe WorkflowStoreState.Submitted } } } + "fetch encrypted and cleared workflow options" in { + EncryptionSpec.assumeAes256Cbc() + + val optionedSourceFiles = HelloWorld.asWorkflowSources(workflowOptions = + s"""|{ + | "key": "value", + | "refresh_token": "it's a secret" + |} + |""".stripMargin) + + + val store = new InMemoryWorkflowStore + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) + val readMetadataActor = system.actorOf(ReadMetadataActor.props()) + storeActor ! BatchSubmitWorkflows(NonEmptyList.of(optionedSourceFiles)) + val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList + + storeActor ! FetchRunnableWorkflows(1) + expectMsgPF(10 seconds) { + case NewWorkflowsToStart(workflowNel) => + workflowNel.toList.size should be(1) + checkDistinctIds(workflowNel.toList) should be(true) + workflowNel.toList.foreach { + case WorkflowToStart(id, sources, state) => + insertedIds.contains(id) should be(true) + sources.wdlSource should be(optionedSourceFiles.wdlSource) + sources.inputsJson should be(optionedSourceFiles.inputsJson) + state should be(WorkflowStoreState.Submitted) + + import spray.json._ + + val encryptedJsObject = sources.workflowOptionsJson.parseJson.asJsObject + encryptedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + encryptedJsObject.fields("key") should be(JsString("value")) + encryptedJsObject.fields("refresh_token").asJsObject.fields.keys should contain theSameElementsAs + Seq("iv", "ciphertext") + + readMetadataActor ! GetMetadataQueryAction(MetadataQuery.forWorkflow(id)) + expectMsgPF(10 seconds) { + case MetadataLookupResponse(_, eventList) => + val optionsEvent = eventList.find(_.key.key == "submittedFiles:options").get + val clearedJsObject = optionsEvent.value.get.value.parseJson.asJsObject + clearedJsObject.fields.keys should contain theSameElementsAs Seq("key", "refresh_token") + clearedJsObject.fields("key") should be(JsString("value")) + clearedJsObject.fields("refresh_token") should be(JsString("cleared")) + } + } + } + } + "return only the remaining workflows if N is larger than size" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! BatchSubmitWorkflows(NonEmptyList.of(helloWorldSourceFiles, helloWorldSourceFiles, helloWorldSourceFiles)) val insertedIds = expectMsgType[WorkflowsBatchSubmittedToStore](10 seconds).workflowIds.toList @@ -84,7 +143,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { workflowNel map { case WorkflowToStart(id, sources, state) => insertedIds.contains(id) shouldBe true - sources shouldBe helloWorldSourceFiles + sources shouldBe prettyOptions(helloWorldSourceFiles) state shouldBe WorkflowStoreState.Submitted } } @@ -92,7 +151,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "remove workflows which exist" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) storeActor ! SubmitWorkflow(helloWorldSourceFiles) val id = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId storeActor ! RemoveWorkflow(id) @@ -105,7 +164,7 @@ class WorkflowStoreActorSpec extends CromwellTestkitSpec with Matchers { "remain responsive if you ask to remove a workflow it doesn't have" in { val store = new InMemoryWorkflowStore - val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestkitSpec.ServiceRegistryActorInstance)) + val storeActor = system.actorOf(WorkflowStoreActor.props(store, CromwellTestKitSpec.ServiceRegistryActorInstance)) val id = WorkflowId.randomId() storeActor ! RemoveWorkflow(id) diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala index f98fa17bd..0763a1c30 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/DefaultBackendJobExecutionActor.scala @@ -1,9 +1,9 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobSucceededResponse} import cromwell.backend._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} import scala.concurrent.Future @@ -14,7 +14,7 @@ object DefaultBackendJobExecutionActor { case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor) extends BackendJobExecutionActor { override def execute: Future[BackendJobExecutionResponse] = { - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + Future.successful(JobSucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) } override def recover = execute @@ -25,7 +25,7 @@ case class DefaultBackendJobExecutionActor(override val jobDescriptor: BackendJo class DefaultBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala index 60617f468..eaaa04abb 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendJobExecutionActor.scala @@ -2,7 +2,7 @@ package cromwell.engine.backend.mock import akka.actor.Props import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.backend.BackendJobExecutionActor.{FailedRetryableResponse, BackendJobExecutionResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedRetryableResponse, BackendJobExecutionResponse, JobSucceededResponse} import scala.concurrent.Future @@ -16,9 +16,9 @@ case class RetryableBackendJobExecutionActor(override val jobDescriptor: Backend override def execute: Future[BackendJobExecutionResponse] = { if (jobDescriptor.key.attempt < attempts) - Future.successful(FailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) + Future.successful(JobFailedRetryableResponse(jobDescriptor.key, new RuntimeException("An apparent transient Exception!"), None)) else - Future.successful(SucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) + Future.successful(JobSucceededResponse(jobDescriptor.key, Some(0), (jobDescriptor.call.task.outputs map taskOutputToJobOutput).toMap, None, Seq.empty)) } override def recover = execute diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala index 46f28f447..c88481610 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/RetryableBackendLifecycleActorFactory.scala @@ -2,13 +2,13 @@ package cromwell.engine.backend.mock import akka.actor.{ActorRef, Props} import cromwell.backend._ -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} class RetryableBackendLifecycleActorFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor) extends BackendLifecycleActorFactory { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = None override def jobExecutionActorProps(jobDescriptor: BackendJobDescriptor, diff --git a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala index 4baeb9c33..a2f914121 100644 --- a/engine/src/test/scala/cromwell/engine/backend/mock/package.scala +++ b/engine/src/test/scala/cromwell/engine/backend/mock/package.scala @@ -9,7 +9,7 @@ package object mock { // This is used by stubbed backends that are to be used in tests to prepare dummy outputs for job def taskOutputToJobOutput(taskOutput: TaskOutput) = - taskOutput.name -> JobOutput(sampleValue(taskOutput.wdlType)) + taskOutput.unqualifiedName -> JobOutput(sampleValue(taskOutput.wdlType)) private def sampleValue(wdlType: WdlType): WdlValue = wdlType match { case WdlIntegerType => WdlInteger(3) diff --git a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala index 540da9863..6dec787d5 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/SingleWorkflowRunnerActorSpec.scala @@ -6,10 +6,11 @@ import java.time.OffsetDateTime import akka.actor._ import akka.pattern.ask import akka.testkit.TestKit +import akka.util.Timeout import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec._ -import cromwell.core.WorkflowSourceFiles +import cromwell.CromwellTestKitSpec._ +import cromwell.core.{WorkflowSourceFilesCollection} import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.SingleWorkflowRunnerActor.RunWorkflow import cromwell.engine.workflow.SingleWorkflowRunnerActorSpec._ @@ -17,7 +18,7 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.engine.workflow.workflowstore.{InMemoryWorkflowStore, WorkflowStoreActor} import cromwell.util.SampleWdl import cromwell.util.SampleWdl.{ExpressionsInInputs, GoodbyeWorld, ThreeStep} -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} +import cromwell.{AlwaysHappyJobStoreActor, AlwaysHappySubWorkflowStoreActor, CromwellTestKitSpec, EmptyCallCacheReadActor} import org.scalatest.prop.{TableDrivenPropertyChecks, TableFor3} import spray.json._ @@ -45,29 +46,34 @@ object SingleWorkflowRunnerActorSpec { def toFields = jsValue.get.asJsObject.fields } - class TestSingleWorkflowRunnerActor(source: WorkflowSourceFiles, + class TestSingleWorkflowRunnerActor(source: WorkflowSourceFilesCollection, metadataOutputPath: Option[Path]) extends SingleWorkflowRunnerActor(source, metadataOutputPath) { - override lazy val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance + override lazy val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance } } -abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { +abstract class SingleWorkflowRunnerActorSpec extends CromwellTestKitSpec { private val workflowStore = system.actorOf(WorkflowStoreActor.props(new InMemoryWorkflowStore, dummyServiceRegistryActor)) private val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + private val subWorkflowStore = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) private val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) private val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) def workflowManagerActor(): ActorRef = { - system.actorOf(Props(new WorkflowManagerActor(ConfigFactory.load(), + val params = WorkflowManagerActorParams(ConfigFactory.load(), workflowStore, dummyServiceRegistryActor, dummyLogCopyRouter, jobStore, + subWorkflowStore, callCacheReadActor, jobTokenDispenserActor, - BackendSingletonCollection(Map.empty))), "WorkflowManagerActor") + BackendSingletonCollection(Map.empty), + abortJobsOnTerminate = false, + serverMode = false) + system.actorOf(Props(new WorkflowManagerActor(params)), "WorkflowManagerActor") } def createRunnerActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), @@ -78,7 +84,7 @@ abstract class SingleWorkflowRunnerActorSpec extends CromwellTestkitSpec { def singleWorkflowActor(sampleWdl: SampleWdl = ThreeStep, managerActor: => ActorRef = workflowManagerActor(), outputFile: => Option[Path] = None): Unit = { val actorRef = createRunnerActor(sampleWdl, managerActor, outputFile) - val futureResult = actorRef ? RunWorkflow + val futureResult = actorRef.ask(RunWorkflow)(timeout = new Timeout(TimeoutDuration)) Await.ready(futureResult, Duration.Inf) () } @@ -111,9 +117,8 @@ class SingleWorkflowRunnerActorWithMetadataSpec extends SingleWorkflowRunnerActo singleWorkflowActor( sampleWdl = wdlFile, outputFile = Option(metadataFile.path)) + TestKit.shutdownActorSystem(system, TimeoutDuration) } - TestKit.shutdownActorSystem(system, TimeoutDuration) - val metadataFileContent = metadataFile.contentAsString val metadata = metadataFileContent.parseJson.asJsObject.fields metadata.get("id") shouldNot be(empty) @@ -200,7 +205,7 @@ class SingleWorkflowRunnerActorWithMetadataOnFailureSpec extends SingleWorkflowR val calls = metadata.get("calls").toFields calls should not be empty - val callSeq = calls("goodbye.goodbye").asInstanceOf[JsArray].elements + val callSeq = calls("wf_goodbye.goodbye").asInstanceOf[JsArray].elements callSeq should have size 1 val call = callSeq.head.asJsObject.fields val inputs = call.get("inputs").toFields diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index b98c5657f..1301f0e9b 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -3,8 +3,8 @@ package cromwell.engine.workflow import akka.actor.{Actor, ActorRef} import akka.testkit.{TestActorRef, TestFSMRef, TestProbe} import com.typesafe.config.{Config, ConfigFactory} -import cromwell.backend.AllBackendInitializationData -import cromwell.core.{ExecutionStore, OutputStore, WorkflowId, WorkflowSourceFiles} +import cromwell.backend.{AllBackendInitializationData, JobExecutionMap} +import cromwell.core._ import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.backend.BackendSingletonCollection import cromwell.engine.workflow.WorkflowActor._ @@ -13,13 +13,13 @@ import cromwell.engine.workflow.lifecycle.WorkflowFinalizationActor.{StartFinali import cromwell.engine.workflow.lifecycle.WorkflowInitializationActor.{WorkflowInitializationAbortedResponse, WorkflowInitializationFailedResponse} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{WorkflowExecutionAbortedResponse, WorkflowExecutionFailedResponse, WorkflowExecutionSucceededResponse} import cromwell.util.SampleWdl.ThreeStep -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor} +import cromwell.{AlwaysHappyJobStoreActor, AlwaysHappySubWorkflowStoreActor, CromwellTestKitSpec, EmptyCallCacheReadActor} import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.Eventually import scala.concurrent.duration._ -class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { +class WorkflowActorSpec extends CromwellTestKitSpec with WorkflowDescriptorBuilder with BeforeAndAfter with Eventually { override implicit val actorSystem = system val mockServiceRegistryActor = TestActorRef(new Actor { @@ -53,6 +53,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild serviceRegistryActor = mockServiceRegistryActor, workflowLogCopyRouter = TestProbe().ref, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), + subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), jobTokenDispenserActor = TestProbe().ref ), @@ -62,7 +63,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild actor } - implicit val TimeoutDuration = CromwellTestkitSpec.TimeoutDuration + implicit val TimeoutDuration = CromwellTestKitSpec.TimeoutDuration "WorkflowActor" should { @@ -95,7 +96,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization if Execution fails" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionFailedResponse(ExecutionStore.empty, OutputStore.empty, Seq(new Exception("Execution Failed"))) + actor ! WorkflowExecutionFailedResponse(Map.empty, new Exception("Execution Failed")) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -108,9 +109,9 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild deathwatch watch actor actor ! AbortWorkflowCommand eventually { actor.stateName should be(WorkflowAbortingState) } - currentLifecycleActor.expectMsgPF(CromwellTestkitSpec.TimeoutDuration) { + currentLifecycleActor.expectMsgPF(CromwellTestKitSpec.TimeoutDuration) { case EngineLifecycleActorAbortCommand => - actor ! WorkflowExecutionAbortedResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionAbortedResponse(Map.empty) } finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) @@ -122,7 +123,7 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild "run Finalization actor if Execution succeeds" in { val actor = createWorkflowActor(ExecutingWorkflowState) deathwatch watch actor - actor ! WorkflowExecutionSucceededResponse(ExecutionStore.empty, OutputStore.empty) + actor ! WorkflowExecutionSucceededResponse(Map.empty, Map.empty) finalizationProbe.expectMsg(StartFinalizationCommand) actor.stateName should be(FinalizingWorkflowState) actor ! WorkflowFinalizationSucceededResponse @@ -151,13 +152,14 @@ class WorkflowActorSpec extends CromwellTestkitSpec with WorkflowDescriptorBuild class MockWorkflowActor(val finalizationProbe: TestProbe, workflowId: WorkflowId, startMode: StartMode, - workflowSources: WorkflowSourceFiles, + workflowSources: WorkflowSourceFilesCollection, conf: Config, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, jobStoreActor: ActorRef, + subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, - jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, callCacheReadActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty)) { + jobTokenDispenserActor: ActorRef) extends WorkflowActor(workflowId, startMode, workflowSources, conf, serviceRegistryActor, workflowLogCopyRouter, jobStoreActor, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, BackendSingletonCollection(Map.empty), serverMode = true) { - override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, executionStore: ExecutionStore, outputStore: OutputStore) = finalizationProbe.ref + override def makeFinalizationActor(workflowDescriptor: EngineWorkflowDescriptor, jobExecutionMap: JobExecutionMap, worfklowOutputs: CallOutputs) = finalizationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala index 73013e712..e4f5e1350 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/WorkflowDescriptorBuilder.scala @@ -2,8 +2,8 @@ package cromwell.engine.workflow import akka.actor.{ActorSystem, Props} import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowSourceFiles} +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowSourceFilesCollection} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse, WorkflowDescriptorMaterializationResult} @@ -12,16 +12,16 @@ import scala.concurrent.Await trait WorkflowDescriptorBuilder { - implicit val awaitTimeout = CromwellTestkitSpec.TimeoutDuration + implicit val awaitTimeout = CromwellTestKitSpec.TimeoutDuration implicit val actorSystem: ActorSystem - def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFiles): EngineWorkflowDescriptor = { + def createMaterializedEngineWorkflowDescriptor(id: WorkflowId, workflowSources: WorkflowSourceFilesCollection): EngineWorkflowDescriptor = { import akka.pattern.ask implicit val timeout = akka.util.Timeout(awaitTimeout) implicit val ec = actorSystem.dispatcher val serviceRegistryIgnorer = actorSystem.actorOf(Props.empty) - val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id), "MaterializeWorkflowDescriptorActor-" + id.id) + val actor = actorSystem.actorOf(MaterializeWorkflowDescriptorActor.props(serviceRegistryIgnorer, id, importLocalFilesystem = false), "MaterializeWorkflowDescriptorActor-" + id.id) val workflowDescriptorFuture = actor.ask( MaterializeWorkflowDescriptorCommand(workflowSources, ConfigFactory.load) ).mapTo[WorkflowDescriptorMaterializationResult] diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala index 970bc7156..0715ebeed 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/MaterializeWorkflowDescriptorActorSpec.scala @@ -3,8 +3,8 @@ package cromwell.engine.workflow.lifecycle import akka.actor.Props import akka.testkit.TestDuration import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec -import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFiles} +import cromwell.CromwellTestKitSpec +import cromwell.core.{WorkflowId, WorkflowOptions, WorkflowSourceFilesWithoutImports} import cromwell.engine.backend.{BackendConfigurationEntry, CromwellBackends} import cromwell.engine.workflow.lifecycle.MaterializeWorkflowDescriptorActor.{MaterializeWorkflowDescriptorCommand, MaterializeWorkflowDescriptorFailureResponse, MaterializeWorkflowDescriptorSuccessResponse} import cromwell.util.SampleWdl.HelloWorld @@ -16,7 +16,7 @@ import wdl4s.values.{WdlInteger, WdlString} import scala.concurrent.duration._ -class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with BeforeAndAfter with MockitoSugar { +class MaterializeWorkflowDescriptorActorSpec extends CromwellTestKitSpec with BeforeAndAfter with MockitoSugar { val workflowId = WorkflowId.randomId() val minimumConf = ConfigFactory.parseString( @@ -54,18 +54,18 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be "MaterializeWorkflowDescriptorActor" should { "accept valid WDL, inputs and options files" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => wfDesc.id shouldBe workflowId - wfDesc.name shouldBe "hello" + wfDesc.name shouldBe "wf_hello" wfDesc.namespace.tasks.size shouldBe 1 - wfDesc.workflowInputs.head shouldBe (("hello.hello.addressee", WdlString("world"))) - wfDesc.backendDescriptor.inputs.head shouldBe (("hello.hello.addressee", WdlString("world"))) + wfDesc.workflowInputs.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) + wfDesc.backendDescriptor.inputs.head shouldBe (("wf_hello.hello.addressee", WdlString("world"))) wfDesc.getWorkflowOption(WorkflowOptions.WriteToCache) shouldBe Option("true") wfDesc.getWorkflowOption(WorkflowOptions.ReadFromCache) shouldBe None // Default backend assignment is "Local": @@ -73,7 +73,7 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be case (call, assignment) if call.task.name.equals("hello") => assignment shouldBe "Local" case (call, assignment) => fail(s"Unexpected call: ${call.task.name}") } - wfDesc.engineFilesystems.size shouldBe 1 + wfDesc.pathBuilders.size shouldBe 1 case MaterializeWorkflowDescriptorFailureResponse(reason) => fail(s"Materialization failed with $reason") case unknown => fail(s"Unexpected materialization response: $unknown") @@ -99,8 +99,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be |{ "foo.i": "17" } """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, inputs, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, inputs, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -149,8 +149,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | } |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", defaultDocker) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", defaultDocker) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -196,14 +196,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be val cromwellBackends = CromwellBackends(fauxBackendEntries) // Run the test: - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, cromwellBackends)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, cromwellBackends, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", "{}") materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => - wfDesc.namespace.workflow.calls foreach { + wfDesc.namespace.workflow.taskCalls foreach { case call if call.task.name.equals("a") => wfDesc.backendAssignments(call) shouldBe "SpecifiedBackend" case call if call.task.name.equals("b") => @@ -231,8 +231,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", "{}") + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", "{}") materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, differentDefaultBackendConf) within(Timeout) { @@ -250,8 +250,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid WDL source" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(unstructuredFile, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(unstructuredFile, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -274,8 +274,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |# no workflow foo { ... } block!! """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(noWorkflowWdl, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -299,8 +299,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | |workflow foo { } """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val badWdlSources = WorkflowSourceFiles(noWorkflowWdl, validInputsJson, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val badWdlSources = WorkflowSourceFilesWithoutImports(noWorkflowWdl, validInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badWdlSources, minimumConf) within(Timeout) { @@ -318,8 +318,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be "reject an invalid options file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, validInputsJson, unstructuredFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, validInputsJson, unstructuredFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -336,8 +336,8 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject an invalid workflow inputs file" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdlSourceNoDocker, unstructuredFile, validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, unstructuredFile, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { @@ -354,15 +354,15 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be } "reject requests if any required inputs are missing" in { - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) val noInputsJson = "{}" - val badOptionsSources = WorkflowSourceFiles(wdlSourceNoDocker, noInputsJson, validOptionsFile) + val badOptionsSources = WorkflowSourceFilesWithoutImports(wdlSourceNoDocker, noInputsJson, validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(badOptionsSources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nRequired workflow input 'hello.hello.addressee' not specified") + reason.getMessage should startWith("Workflow input processing failed.\nRequired workflow input 'wf_hello.hello.addressee' not specified") case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") @@ -381,14 +381,14 @@ class MaterializeWorkflowDescriptorActorSpec extends CromwellTestkitSpec with Be | call bar |} """.stripMargin - val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId)) - val sources = WorkflowSourceFiles(wdl, "{}", validOptionsFile) + val materializeWfActor = system.actorOf(MaterializeWorkflowDescriptorActor.props(NoBehaviourActor, workflowId, importLocalFilesystem = false)) + val sources = WorkflowSourceFilesWithoutImports(wdl, "{}", validOptionsFile) materializeWfActor ! MaterializeWorkflowDescriptorCommand(sources, minimumConf) within(Timeout) { expectMsgPF() { case MaterializeWorkflowDescriptorFailureResponse(reason) => - reason.getMessage should startWith("Workflow input processing failed.\nInvalid right-side type of 'foo.j'. Expecting Int, got String") + reason.getMessage should startWith("Workflow input processing failed.\nUnable to load namespace from workflow: ERROR: Value for j is not coerceable into a Int") case MaterializeWorkflowDescriptorSuccessResponse(wfDesc) => fail("This materialization should not have succeeded!") case unknown => fail(s"Unexpected materialization response: $unknown") } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala new file mode 100644 index 000000000..1f61772e6 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/SubWorkflowExecutionActorSpec.scala @@ -0,0 +1,213 @@ +package cromwell.engine.workflow.lifecycle.execution + +import java.util.UUID + +import akka.actor.Props +import akka.testkit.{TestFSMRef, TestProbe} +import cromwell.backend.{AllBackendInitializationData, BackendWorkflowDescriptor, JobExecutionMap} +import cromwell.core._ +import cromwell.core.callcaching.CallCachingOff +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.backend.BackendSingletonCollection +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{CallPreparationFailed, SubWorkflowPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.SubWorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.{ContinueWhilePossible, EngineWorkflowDescriptor} +import cromwell.subworkflowstore.SubWorkflowStoreActor.{QuerySubWorkflow, SubWorkflowFound, SubWorkflowNotFound} +import org.scalatest.concurrent.Eventually +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito +import wdl4s.{WdlNamespaceWithWorkflow, Workflow, WorkflowCall} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +class SubWorkflowExecutionActorSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito with Eventually { + + behavior of "SubWorkflowExecutionActor" + + val serviceRegistryProbe = TestProbe() + val jobStoreProbe = TestProbe() + val subWorkflowStoreProbe = TestProbe() + val callCacheReadActorProbe = TestProbe() + val jobTokenDispenserProbe = TestProbe() + val preparationActor = TestProbe() + val subWorkflowActor = TestProbe() + val deathWatch = TestProbe() + val parentProbe = TestProbe() + val parentBackendDescriptor = mock[BackendWorkflowDescriptor] + val parentWorkflowId: WorkflowId = WorkflowId.randomId() + parentBackendDescriptor.id returns parentWorkflowId + val parentWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + parentBackendDescriptor, + Map.empty, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + val subWorkflow = mock[Workflow] + subWorkflow.unqualifiedName returns "sub_wf" + val subWorkflowCall = mock[WorkflowCall] + subWorkflowCall.fullyQualifiedName returns "foo.bar" + subWorkflowCall.callable returns subWorkflow + val subKey = SubWorkflowKey(subWorkflowCall, None, 1) + + val awaitTimeout: FiniteDuration = 10 seconds + + def buildEWEA(restart: Boolean = false) = { + new TestFSMRef[SubWorkflowExecutionActorState, SubWorkflowExecutionActorData, SubWorkflowExecutionActor](system, Props( + new SubWorkflowExecutionActor( + subKey, + WorkflowExecutionActorData.empty(parentWorkflowDescriptor), + Map.empty, + serviceRegistryProbe.ref, + jobStoreProbe.ref, + subWorkflowStoreProbe.ref, + callCacheReadActorProbe.ref, + jobTokenDispenserProbe.ref, + BackendSingletonCollection(Map.empty), + AllBackendInitializationData(Map.empty), + restart + ) { + override def createSubWorkflowPreparationActor(subWorkflowId: WorkflowId) = preparationActor.ref + override def createSubWorkflowActor(createSubWorkflowActor: EngineWorkflowDescriptor) = subWorkflowActor.ref + }), parentProbe.ref, s"SubWorkflowExecutionActorSpec-${UUID.randomUUID()}") + } + + it should "Check the sub workflow store when restarting" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + subWorkflowStoreProbe.expectMsg(QuerySubWorkflow(parentWorkflowId, subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowCheckingStoreState + } + } + + it should "Reuse sub workflow id if found in the store" in { + import cromwell.core.ExecutionIndex._ + + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + val subWorkflowUuid = WorkflowId.randomId() + ewea ! SubWorkflowFound(SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, subKey.scope.fullyQualifiedName, subKey.index.fromIndex, subKey.attempt, subWorkflowUuid.toString, None)) + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId shouldBe Some(subWorkflowUuid) + } + } + + it should "Fall back to a random Id if the sub workflow id is not found in the store" in { + val ewea = buildEWEA(restart = true) + ewea.setState(SubWorkflowCheckingStoreState) + + ewea ! SubWorkflowNotFound(QuerySubWorkflow(parentWorkflowId, subKey)) + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + ewea.stateData.subWorkflowId should not be empty + } + } + + it should "Prepare a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPendingState) + + ewea ! Execute + preparationActor.expectMsg(CallPreparationActor.Start) + parentProbe.expectMsg(JobStarting(subKey)) + eventually { + ewea.stateName shouldBe SubWorkflowPreparingState + } + } + + it should "Run a sub workflow" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + val subWorkflowId = WorkflowId.randomId() + val subBackendDescriptor = mock[BackendWorkflowDescriptor] + subBackendDescriptor.id returns subWorkflowId + val subWorkflowDescriptor = EngineWorkflowDescriptor( + mock[WdlNamespaceWithWorkflow], + subBackendDescriptor, + Map.empty, + Map.empty, + ContinueWhilePossible, + List.empty, + CallCachingOff + ) + + ewea ! SubWorkflowPreparationSucceeded(subWorkflowDescriptor, Map.empty) + subWorkflowActor.expectMsg(WorkflowExecutionActor.ExecuteWorkflowCommand) + parentProbe.expectMsg(JobRunning(subKey, Map.empty, Option(subWorkflowActor.ref))) + eventually { + ewea.stateName shouldBe SubWorkflowRunningState + } + } + + it should "Fail a sub workflow if preparation failed" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowPreparingState) + deathWatch watch ewea + + val subWorkflowKey = mock[SubWorkflowKey] + val throwable: Exception = new Exception("Expected test exception") + val preparationFailedMessage: CallPreparationFailed = CallPreparationFailed(subWorkflowKey, throwable) + ewea ! preparationFailedMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, Map.empty, throwable)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Successful message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val outputs: CallOutputs = Map.empty[LocallyQualifiedName, JobOutput] + val workflowSuccessfulMessage = WorkflowExecutionSucceededResponse(jobExecutionMap, outputs) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowSucceededResponse(subKey, jobExecutionMap, outputs)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Failed message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val expectedException: Exception = new Exception("Expected test exception") + + val workflowSuccessfulMessage = WorkflowExecutionFailedResponse(jobExecutionMap, expectedException) + ewea ! workflowSuccessfulMessage + parentProbe.expectMsg(SubWorkflowFailedResponse(subKey, jobExecutionMap, expectedException)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + + it should "Relay Workflow Aborted message" in { + val ewea = buildEWEA() + ewea.setState(SubWorkflowRunningState, SubWorkflowExecutionActorData(Some(WorkflowId.randomId()))) + + deathWatch watch ewea + + val jobExecutionMap: JobExecutionMap = Map.empty + val workflowAbortedMessage = WorkflowExecutionAbortedResponse(jobExecutionMap) + ewea ! workflowAbortedMessage + parentProbe.expectMsg(SubWorkflowAbortedResponse(subKey, jobExecutionMap)) + deathWatch.expectTerminated(ewea, awaitTimeout) + } + +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala index 01af65d1a..51007a2df 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/WorkflowExecutionActorSpec.scala @@ -12,13 +12,13 @@ import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor import cromwell.services.ServiceRegistryActor import cromwell.services.metadata.MetadataService import cromwell.util.SampleWdl -import cromwell.{AlwaysHappyJobStoreActor, CromwellTestkitSpec, EmptyCallCacheReadActor, MetadataWatchActor} +import cromwell._ import org.scalatest.BeforeAndAfter -import scala.concurrent.{Await, Promise} import scala.concurrent.duration._ +import scala.concurrent.{Await, Promise} -class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter with WorkflowDescriptorBuilder { +class WorkflowExecutionActorSpec extends CromwellTestKitSpec with BeforeAndAfter with WorkflowDescriptorBuilder { override implicit val actorSystem = system implicit val DefaultDuration = 20.seconds.dilated @@ -53,6 +53,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val metadataWatcherProps = Props(MetadataWatchActor(metadataSuccessPromise, requiredMetadataMatchers: _*)) val serviceRegistryActor = system.actorOf(ServiceRegistryActor.props(ConfigFactory.load(), overrides = Map(MetadataService.MetadataServiceName -> metadataWatcherProps))) val jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) val MockBackendConfigEntry = BackendConfigurationEntry( name = "Mock", @@ -66,12 +67,12 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val callCacheReadActor = TestProbe() val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, + WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistryActor, jobStoreActor, subWorkflowStoreActor, callCacheReadActor.ref, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") EventFilter.info(pattern = ".*Final Outputs", occurrences = 1).intercept { - EventFilter.info(pattern = "Starting calls: hello.hello", occurrences = 3).intercept { + EventFilter.info(pattern = "Starting calls: wf_hello.hello", occurrences = 3).intercept { workflowExecutionActor ! ExecuteWorkflowCommand } } @@ -86,6 +87,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter "execute a workflow with scatters" in { val serviceRegistry = mockServiceRegistryActor val jobStore = system.actorOf(AlwaysHappyJobStoreActor.props) + val subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props) val callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props) val jobTokenDispenserActor = system.actorOf(JobExecutionTokenDispenserActor.props) @@ -99,7 +101,7 @@ class WorkflowExecutionActorSpec extends CromwellTestkitSpec with BeforeAndAfter val workflowId = WorkflowId.randomId() val engineWorkflowDescriptor = createMaterializedEngineWorkflowDescriptor(workflowId, SampleWdl.SimpleScatterWdl.asWorkflowSources(runtime = runtimeSection)) val workflowExecutionActor = system.actorOf( - WorkflowExecutionActor.props(workflowId, engineWorkflowDescriptor, serviceRegistry, jobStore, + WorkflowExecutionActor.props(engineWorkflowDescriptor, serviceRegistry, jobStore, subWorkflowStoreActor, callCacheReadActor, jobTokenDispenserActor, MockBackendSingletonCollection, AllBackendInitializationData.empty, restarting = false), "WorkflowExecutionActor") diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala index d79a383fb..34ac835cd 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/callcaching/EngineJobHashingActorSpec.scala @@ -1,23 +1,23 @@ package cromwell.engine.workflow.lifecycle.execution.callcaching import akka.actor.{ActorRef, ActorSystem, Props} -import akka.testkit.{ImplicitSender, TestKit, TestProbe} +import akka.testkit.{ImplicitSender, TestProbe} import cats.data.NonEmptyList -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec +import cromwell.backend._ import cromwell.backend.callcaching.FileHashingActor.{FileHashResponse, SingleFileHashRequest} -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, RuntimeAttributeDefinition} import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CacheMiss, CallCacheHashes} import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} +import org.scalatest.{Matchers, WordSpecLike} import wdl4s._ import wdl4s.values.{WdlFile, WdlValue} import scala.concurrent.duration._ import scala.language.postfixOps -class EngineJobHashingActorSpec extends TestKit(new CromwellTestkitSpec.TestWorkflowManagerSystem().actorSystem) - with ImplicitSender with WordSpecLike with Matchers with MockitoSugar with BeforeAndAfterAll { +class EngineJobHashingActorSpec extends CromwellTestKitSpec + with ImplicitSender with WordSpecLike with Matchers with MockitoSugar { import EngineJobHashingActorSpec._ @@ -162,13 +162,9 @@ class EngineJobHashingActorSpec extends TestKit(new CromwellTestkitSpec.TestWork } } } - - override def afterAll() = { - TestKit.shutdownActorSystem(system) - } } -object EngineJobHashingActorSpec extends MockitoSugar { +object EngineJobHashingActorSpec extends BackendSpec { import org.mockito.Mockito._ def createEngineJobHashingActor @@ -198,12 +194,12 @@ object EngineJobHashingActorSpec extends MockitoSugar { def templateJobDescriptor(inputs: Map[LocallyQualifiedName, WdlValue] = Map.empty) = { val task = mock[Task] - val call = mock[Call] + val call = mock[TaskCall] when(task.commandTemplateString).thenReturn("Do the stuff... now!!") when(task.outputs).thenReturn(List.empty) when(call.task).thenReturn(task) val workflowDescriptor = mock[BackendWorkflowDescriptor] - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, None, 1), Map.empty, fqnMapToDeclarationMap(inputs)) jobDescriptor } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala index 7aa4ccfda..b39711759 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaBackendIsCopyingCachedOutputsSpec.scala @@ -3,9 +3,10 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import cats.data.NonEmptyList import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import EngineJobExecutionActorSpec._ -import cromwell.core.callcaching.CallCachingMode +import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.{CacheHit, CallCacheHashes, EJHAResponse, HashError} import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId + import scala.util.{Failure, Success, Try} import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes @@ -97,20 +98,43 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec } } + if (mode.readFromCache) { s"invalidate a call for caching if backend coping failed when it was going to receive $hashComboName, if call caching is $mode" in { ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) // Send the response from the copying actor ejea ! failureNonRetryableResponse expectInvalidateCallCacheActor(cacheId) - eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, initialHashData, cacheHit)) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, cacheHit)) + } + + s"not invalidate a call for caching if backend coping failed when invalidation is disabled, when it was going to receive $hashComboName, if call caching is $mode" in { + val invalidationDisabledOptions = CallCachingOptions(invalidateBadCacheResults = false) + val cacheInvalidationDisabledMode = mode match { + case CallCachingActivity(rw, options) => CallCachingActivity(rw, invalidationDisabledOptions) + case _ => fail(s"Mode $mode not appropriate for cache invalidation tests") + } + ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, cacheInvalidationDisabledMode) + // Send the response from the copying actor + ejea ! failureNonRetryableResponse + + eventually { + ejea.stateName should be(RunningJob) + } + // Make sure we didn't start invalidating anything: + helper.invalidateCacheActorCreations.hasExactlyOne should be(false) + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, initialHashData, None)) } s"invalidate a call for caching if backend coping failed (preserving and received hashes) when call caching is $mode, the EJEA has $hashComboName and then gets a success result" in { ejea = ejeaInBackendIsCopyingCachedOutputsState(initialHashData, mode) // Send the response from the EJHA (if there was one!): - ejhaResponse foreach { ejea ! _ } + ejhaResponse foreach { + ejea ! _ + } // Nothing should happen here: helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) @@ -120,9 +144,12 @@ class EjeaBackendIsCopyingCachedOutputsSpec extends EngineJobExecutionActorSpec ejea ! failureNonRetryableResponse expectInvalidateCallCacheActor(cacheId) - eventually { ejea.stateName should be(InvalidatingCacheEntry) } - ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper. bjeaProps, finalHashData, cacheHit)) + eventually { + ejea.stateName should be(InvalidatingCacheEntry) + } + ejea.stateData should be(ResponsePendingData(helper.backendJobDescriptor, helper.bjeaProps, finalHashData, cacheHit)) } + } } } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala index a8f2bdf46..5a9bb4f5c 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaCheckingJobStoreSpec.scala @@ -1,9 +1,9 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core._ +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{CheckingJobStore, NoData, PreparingJob} -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec.EnhancedTestEJEA import cromwell.jobstore.JobStoreActor.{JobComplete, JobNotComplete} import cromwell.jobstore.{JobResultFailure, JobResultSuccess} @@ -17,12 +17,12 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { createCheckingJobStoreEjea() ejea.setState(CheckingJobStore) val returnCode: Option[Int] = Option(0) - val jobOutputs: JobOutputs = Map.empty + val jobOutputs: CallOutputs = Map.empty ejea ! JobComplete(JobResultSuccess(returnCode, jobOutputs)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: SucceededResponse => + case response: JobSucceededResponse => response.returnCode shouldBe returnCode response.jobOutputs shouldBe jobOutputs } @@ -40,11 +40,11 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { ejea ! JobComplete(JobResultFailure(returnCode, reason, retryable)) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case response: FailedNonRetryableResponse => + case response: JobFailedNonRetryableResponse => false should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason - case response: FailedRetryableResponse => + case response: JobFailedRetryableResponse => true should be(retryable) response.returnCode shouldBe returnCode response.throwable shouldBe reason @@ -59,7 +59,7 @@ class EjeaCheckingJobStoreSpec extends EngineJobExecutionActorSpec { ejea.setState(CheckingJobStore) ejea ! JobNotComplete - helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting RecoverJobCommand", JobPreparationActor.Start) + helper.jobPreparationProbe.expectMsg(awaitTimeout, "expecting RecoverJobCommand", CallPreparationActor.Start) ejea.stateName should be(PreparingJob) ejea.stop() diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala index 1d6f2ccbe..bdb426b60 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaPreparingJobSpec.scala @@ -1,9 +1,10 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import EngineJobExecutionActorSpec._ +import cromwell.backend.BackendJobExecutionActor.JobFailedNonRetryableResponse import cromwell.core.callcaching.CallCachingMode -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor.{BackendJobPreparationFailed, BackendJobPreparationSucceeded} +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor.{BackendJobPreparationSucceeded, CallPreparationFailed} +import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ import org.scalatest.concurrent.Eventually class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHashingInitialization with Eventually { @@ -34,10 +35,11 @@ class EjeaPreparingJobSpec extends EngineJobExecutionActorSpec with CanExpectHas } s"Not proceed if Job Preparation fails ($mode)" in { - val prepFailedResponse = BackendJobPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepActorResponse = CallPreparationFailed(helper.jobDescriptorKey, new Exception("The goggles! They do nothing!")) + val prepFailedEjeaResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, prepActorResponse.throwable, None) ejea = ejeaInPreparingState(mode) - ejea ! prepFailedResponse - helper.replyToProbe.expectMsg(prepFailedResponse) + ejea ! prepActorResponse + helper.replyToProbe.expectMsg(prepFailedEjeaResponse) helper.deathwatch.expectTerminated(ejea, awaitTimeout) } } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala index 83ae08835..3047de560 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaRequestingExecutionTokenSpec.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea +import cromwell.engine.workflow.lifecycle.execution.CallPreparationActor import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ -import cromwell.engine.workflow.lifecycle.execution.JobPreparationActor import cromwell.engine.workflow.tokens.JobExecutionTokenDispenserActor.{JobExecutionTokenDenied, JobExecutionTokenDispensed} import cromwell.jobstore.JobStoreActor.QueryJobCompletion import org.scalatest.concurrent.Eventually @@ -45,7 +45,7 @@ class EjeaRequestingExecutionTokenSpec extends EngineJobExecutionActorSpec with ejea = helper.buildEJEA(restarting = false) ejea ! JobExecutionTokenDispensed(helper.executionToken) - helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", JobPreparationActor.Start) + helper.jobPreparationProbe.expectMsg(max = awaitTimeout, hint = "Awaiting job preparation", CallPreparationActor.Start) helper.jobStoreProbe.expectNoMsg(awaitAlmostNothing) ejea.stateName should be(PreparingJob) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala index 1b783a69e..73a224a00 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EjeaUpdatingJobStoreSpec.scala @@ -1,7 +1,7 @@ package cromwell.engine.workflow.lifecycle.execution.ejea import EngineJobExecutionActorSpec._ -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import cromwell.jobstore.JobStoreActor.{JobStoreWriteFailure, JobStoreWriteSuccess} import cromwell.engine.workflow.lifecycle.execution.ejea.HasJobSuccessResponse.SuccessfulCallCacheHashes @@ -33,7 +33,7 @@ class EjeaUpdatingJobStoreSpec extends EngineJobExecutionActorSpec with HasJobSu val exception = new Exception("I loved Ophelia: forty thousand brothers\\ Could not, with all their quantity of love,\\ Make up my sum. What wilt thou do for her?") ejea ! JobStoreWriteFailure(exception) helper.replyToProbe.expectMsgPF(awaitTimeout) { - case FailedNonRetryableResponse(jobDescriptorKey, reason, None) => + case JobFailedNonRetryableResponse(jobDescriptorKey, reason, None) => jobDescriptorKey should be(helper.jobDescriptorKey) reason.getCause should be(exception) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala index cbba49811..105776781 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpec.scala @@ -4,7 +4,7 @@ import akka.actor.Actor import akka.testkit.TestFSMRef import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor._ import cromwell.jobstore.{Pending => _} -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.backend.BackendJobExecutionActor import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionActorCommand import cromwell.core.callcaching._ @@ -15,7 +15,7 @@ import scala.concurrent.duration._ import scala.language.postfixOps -trait EngineJobExecutionActorSpec extends CromwellTestkitSpec +trait EngineJobExecutionActorSpec extends CromwellTestKitSpec with Matchers with Mockito with BeforeAndAfterAll with BeforeAndAfter { // If we WANT something to happen, make sure it happens within this window: diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala index 8ef607c5b..b6ddb5601 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/EngineJobExecutionActorSpecUtil.scala @@ -1,6 +1,6 @@ package cromwell.engine.workflow.lifecycle.execution.ejea -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, FailedRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobFailedRetryableResponse, JobSucceededResponse} import cromwell.core.JobOutput import cromwell.core.callcaching._ import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, SucceededResponseData, UpdatingCallCache, UpdatingJobStore} @@ -23,7 +23,7 @@ private[ejea] trait CanValidateJobStoreKey { self: EngineJobExecutionActorSpec = } private[ejea] trait CanExpectCacheWrites extends Eventually { self: EngineJobExecutionActorSpec => - def expectCacheWrite(expectedResponse: SucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { + def expectCacheWrite(expectedResponse: JobSucceededResponse, expectedCallCacheHashes: CallCacheHashes): Unit = { eventually { ejea.stateName should be(UpdatingCallCache) } ejea.stateData should be(SucceededResponseData(expectedResponse, Some(Success(expectedCallCacheHashes)))) helper.callCacheWriteActorCreations match { @@ -83,7 +83,7 @@ private[ejea] trait CanExpectCacheInvalidation extends Eventually { self: Engine private[ejea] trait HasJobSuccessResponse { self: EngineJobExecutionActorSpec => val successRc = Option(171) val successOutputs = Map("a" -> JobOutput(WdlInteger(3)), "b" -> JobOutput(WdlString("bee"))) - def successResponse = SucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty) + def successResponse = JobSucceededResponse(helper.jobDescriptorKey, successRc, successOutputs, None, Seq.empty) } private[ejea] object HasJobSuccessResponse { val SuccessfulCallCacheHashes = CallCacheHashes(Set(HashResult(HashKey("whatever you want"), HashValue("whatever you need")))) @@ -93,7 +93,7 @@ private[ejea] trait HasJobFailureResponses { self: EngineJobExecutionActorSpec = val failedRc = Option(12) val failureReason = new Exception("The sixth sheik's sheep is sick!") // Need to delay making the response because job descriptors come from the per-test "helper", which is null outside tests! - def failureRetryableResponse = FailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) - def failureNonRetryableResponse = FailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) + def failureRetryableResponse = JobFailedRetryableResponse(helper.jobDescriptorKey, failureReason, failedRc) + def failureNonRetryableResponse = JobFailedNonRetryableResponse(helper.jobDescriptorKey, failureReason, Option(12)) def abortedResponse = AbortedResponse(helper.jobDescriptorKey) } \ No newline at end of file diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala index 16274a275..9389362ad 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/execution/ejea/PerTestHelper.scala @@ -4,29 +4,27 @@ import java.util.UUID import akka.actor.{ActorRef, ActorSystem, Props} import akka.testkit.{TestFSMRef, TestProbe} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse -import cromwell.backend.{BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse +import cromwell.backend._ import cromwell.core.JobExecutionToken.JobExecutionTokenType import cromwell.core.callcaching.{CallCachingActivity, CallCachingMode, CallCachingOff} -import cromwell.core.{ExecutionStore, JobExecutionToken, OutputStore, WorkflowId} +import cromwell.core.{CallOutputs, JobExecutionToken, WorkflowId} import cromwell.engine.EngineWorkflowDescriptor -import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId -import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} import cromwell.engine.workflow.lifecycle.execution.EngineJobExecutionActor.{EJEAData, EngineJobExecutionActorState} +import cromwell.engine.workflow.lifecycle.execution.callcaching.CallCachingEntryId import cromwell.engine.workflow.lifecycle.execution.callcaching.EngineJobHashingActor.CallCacheHashes +import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ +import cromwell.engine.workflow.lifecycle.execution.{EngineJobExecutionActor, WorkflowExecutionActorData} +import cromwell.engine.workflow.mocks.{DeclarationMock, TaskMock, WdlExpressionMock} +import cromwell.util.AkkaTestUtil._ import org.specs2.mock.Mockito -import wdl4s.WdlExpression.ScopedLookupFunction -import wdl4s.expression.{NoFunctions, WdlFunctions, WdlStandardLibraryFunctions} -import wdl4s.types.{WdlIntegerType, WdlStringType} -import wdl4s.values.{WdlInteger, WdlString, WdlValue} import wdl4s._ -import cromwell.util.AkkaTestUtil._ -import cromwell.engine.workflow.lifecycle.execution.ejea.EngineJobExecutionActorSpec._ - -import scala.util.Success +import wdl4s.expression.{NoFunctions, WdlStandardLibraryFunctions} +import wdl4s.parser.WdlParser.Ast +import wdl4s.types.{WdlIntegerType, WdlStringType} -private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito { +private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mockito with TaskMock with WdlExpressionMock with DeclarationMock { val workflowId = WorkflowId.randomId() val workflowName = "wf" @@ -37,34 +35,29 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock val executionToken = JobExecutionToken(JobExecutionTokenType("test", None), UUID.randomUUID()) - val task = mock[Task] - task.declarations returns Seq.empty - task.runtimeAttributes returns RuntimeAttributes(Map.empty) - task.commandTemplateString returns "!!shazam!!" - val stringOutputExpression = mock[WdlExpression] - stringOutputExpression.valueString returns "hello" - stringOutputExpression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) - task.outputs returns Seq(TaskOutput("outString", WdlStringType, stringOutputExpression)) - - val intInputExpression = mock[WdlExpression] - intInputExpression.valueString returns "543" - intInputExpression.evaluate(any[ScopedLookupFunction], any[WdlFunctions[WdlValue]]) returns Success(WdlInteger(543)) - - val intInputDeclaration = mock[Declaration] - intInputDeclaration.name returns "inInt" - intInputDeclaration.expression returns Option(intInputExpression) - intInputDeclaration.wdlType returns WdlIntegerType - task.declarations returns Seq(intInputDeclaration) - - val call: Call = Call(None, jobFqn, task, Set.empty, Map.empty, None) + val task = mockTask( + taskName, + declarations = Seq(mockDeclaration("inInt", WdlIntegerType, mockIntExpression(543))), + outputs = Seq(("outString", WdlStringType, mockStringExpression("hello"))) + ) + + val workflow = new Workflow( + unqualifiedName = workflowName, + workflowOutputWildcards = Seq.empty, + wdlSyntaxErrorFormatter = mock[WdlSyntaxErrorFormatter], + meta = Map.empty, + parameterMeta = Map.empty, + ast = mock[Ast]) + val call: TaskCall = TaskCall(None, task, Map.empty, mock[Ast]) + call.parent_=(workflow) val jobDescriptorKey = BackendJobDescriptorKey(call, jobIndex, jobAttempt) val backendWorkflowDescriptor = BackendWorkflowDescriptor(workflowId, null, null, null) - val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputs = Map.empty) + val backendJobDescriptor = BackendJobDescriptor(backendWorkflowDescriptor, jobDescriptorKey, runtimeAttributes = Map.empty, inputDeclarations = Map.empty) var fetchCachedResultsActorCreations: ExpectOne[(CallCachingEntryId, Seq[TaskOutput])] = NothingYet var jobHashingInitializations: ExpectOne[(BackendJobDescriptor, CallCachingActivity)] = NothingYet - var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, SucceededResponse)] = NothingYet + var callCacheWriteActorCreations: ExpectOne[(CallCacheHashes, JobSucceededResponse)] = NothingYet var invalidateCacheActorCreations: ExpectOne[CallCachingEntryId] = NothingYet val deathwatch = TestProbe() @@ -94,12 +87,12 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock // These two factory methods should never be called from EJEA or any of its descendants: override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = throw new UnsupportedOperationException("Unexpected finalization actor creation!") } @@ -108,14 +101,14 @@ private[ejea] class PerTestHelper(implicit val system: ActorSystem) extends Mock (implicit startingState: EngineJobExecutionActorState): TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea] = { val factory: BackendLifecycleActorFactory = buildFactory() - val descriptor = EngineWorkflowDescriptor(backendWorkflowDescriptor, Map.empty, null, null, null, callCachingMode) + val descriptor = EngineWorkflowDescriptor(mock[WdlNamespaceWithWorkflow], backendWorkflowDescriptor, Map.empty, null, null, null, callCachingMode) val myBrandNewEjea = new TestFSMRef[EngineJobExecutionActorState, EJEAData, MockEjea](system, Props(new MockEjea( helper = this, jobPreparationProbe = jobPreparationProbe, replyTo = replyToProbe.ref, jobDescriptorKey = jobDescriptorKey, - executionData = WorkflowExecutionActorData(descriptor, ExecutionStore(Map.empty), Map.empty, OutputStore(Map.empty)), + executionData = WorkflowExecutionActorData.empty(descriptor), factory = factory, initializationData = None, restarting = restarting, @@ -149,9 +142,7 @@ private[ejea] class MockEjea(helper: PerTestHelper, override def makeFetchCachedResultsActor(cacheId: CallCachingEntryId, taskOutputs: Seq[TaskOutput]) = helper.fetchCachedResultsActorCreations = helper.fetchCachedResultsActorCreations.foundOne((cacheId, taskOutputs)) override def initializeJobHashing(jobDescriptor: BackendJobDescriptor, activity: CallCachingActivity) = helper.jobHashingInitializations = helper.jobHashingInitializations.foundOne((jobDescriptor, activity)) - override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: SucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) - override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { - helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) - } + override def createSaveCacheResultsActor(hashes: CallCacheHashes, success: JobSucceededResponse) = helper.callCacheWriteActorCreations = helper.callCacheWriteActorCreations.foundOne((hashes, success)) + override def invalidateCacheHit(cacheId: CallCachingEntryId): Unit = { helper.invalidateCacheActorCreations = helper.invalidateCacheActorCreations.foundOne(cacheId) } override def createJobPreparationActor(jobPrepProps: Props, name: String) = jobPreparationProbe.ref } diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala new file mode 100644 index 000000000..471d00777 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/DeclarationMock.scala @@ -0,0 +1,21 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.{Declaration, WdlExpression} +import wdl4s.types.WdlType + +object DeclarationMock { + type DeclarationMockType = (String, WdlType, WdlExpression) +} + +trait DeclarationMock extends Mockito { + def mockDeclaration(name: String, + wdlType: WdlType, + expression: WdlExpression) = { + val declaration = mock[Declaration] + declaration.unqualifiedName returns name + declaration.expression returns Option(expression) + declaration.wdlType returns wdlType + declaration + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala new file mode 100644 index 000000000..4d8ef9c1d --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/TaskMock.scala @@ -0,0 +1,27 @@ +package cromwell.engine.workflow.mocks + +import cromwell.engine.workflow.mocks.DeclarationMock.DeclarationMockType +import org.specs2.mock.Mockito +import wdl4s._ +import wdl4s.parser.WdlParser.Ast + +trait TaskMock extends Mockito { + + def mockTask(name: String, + declarations: Seq[Declaration] = Seq.empty, + runtimeAttributes: RuntimeAttributes = new RuntimeAttributes(Map.empty), + commandTemplateString: String = "!!shazam!!", + outputs: Seq[DeclarationMockType] = Seq.empty + ) = { + val task = mock[Task] + task.declarations returns declarations + task.runtimeAttributes returns runtimeAttributes + task.commandTemplateString returns commandTemplateString + task.name returns name + task.unqualifiedName returns name + task.outputs returns (outputs map { + case (outputName, wdlType, expression) => TaskOutput(outputName, wdlType, expression, mock[Ast], Option(task)) + }) + task + } +} diff --git a/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala new file mode 100644 index 000000000..6bb15b306 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/mocks/WdlExpressionMock.scala @@ -0,0 +1,32 @@ +package cromwell.engine.workflow.mocks + +import org.specs2.mock.Mockito +import wdl4s.WdlExpression +import wdl4s.WdlExpression._ +import wdl4s.expression.WdlFunctions +import wdl4s.values.{WdlInteger, WdlString, WdlValue} + +import scala.util.Success + +trait WdlExpressionMock extends Mockito { + val helloStringExpression = { + val expression = mock[WdlExpression] + expression.valueString returns "hello" + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString("hello")) + expression + } + + def mockStringExpression(value: String) = { + val expression = mock[WdlExpression] + expression.valueString returns value + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlString(value)) + expression + } + + def mockIntExpression(value: Int) = { + val expression = mock[WdlExpression] + expression.valueString returns value.toString + expression.evaluate(any[ScopedLookupFunction], any[ WdlFunctions[WdlValue]]) returns Success(WdlInteger(value)) + expression + } +} diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala index fa1cc5067..ac84a6c45 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreServiceSpec.scala @@ -1,6 +1,6 @@ package cromwell.jobstore -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.backend.BackendJobDescriptorKey import cromwell.core.{JobOutput, WorkflowId} import cromwell.jobstore.JobStoreActor._ @@ -8,9 +8,10 @@ import cromwell.jobstore.JobStoreServiceSpec._ import cromwell.services.SingletonServicesStore import org.scalatest.Matchers import org.specs2.mock.Mockito +import wdl4s.parser.WdlParser.Ast import wdl4s.types.WdlStringType import wdl4s.values.WdlString -import wdl4s.{Call, Task, TaskOutput, WdlExpression} +import wdl4s._ import scala.concurrent.duration._ import scala.language.postfixOps @@ -20,7 +21,7 @@ object JobStoreServiceSpec { val EmptyExpression = WdlExpression.fromString(""" "" """) } -class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito { +class JobStoreServiceSpec extends CromwellTestKitSpec with Matchers with Mockito { "JobStoreService" should { "work" in { @@ -28,10 +29,10 @@ class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito val jobStoreService = system.actorOf(JobStoreActor.props(jobStore)) val workflowId = WorkflowId.randomId() - val successCall = mock[Call] + val successCall = mock[TaskCall] successCall.fullyQualifiedName returns "foo.bar" val mockTask = mock[Task] - mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression)) + mockTask.outputs returns Seq(TaskOutput("baz", WdlStringType, EmptyExpression, mock[Ast], Option(mockTask))) successCall.task returns mockTask val successKey = BackendJobDescriptorKey(successCall, None, 1).toJobStoreKey(workflowId) @@ -49,7 +50,7 @@ class JobStoreServiceSpec extends CromwellTestkitSpec with Matchers with Mockito case JobComplete(JobResultSuccess(Some(0), os)) if os == outputs => } - val failureCall = mock[Call] + val failureCall = mock[TaskCall] failureCall.fullyQualifiedName returns "baz.qux" val failureKey = BackendJobDescriptorKey(failureCall, None, 1).toJobStoreKey(workflowId) diff --git a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala index a6e59679a..eabc86a5e 100644 --- a/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala +++ b/engine/src/test/scala/cromwell/jobstore/JobStoreWriterSpec.scala @@ -1,7 +1,7 @@ package cromwell.jobstore import akka.testkit.TestFSMRef -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.core.WorkflowId import cromwell.jobstore.JobStoreActor.{JobStoreWriteSuccess, RegisterJobCompleted, RegisterWorkflowCompleted} import org.scalatest.{BeforeAndAfter, Matchers} @@ -11,7 +11,7 @@ import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future, Promise} import scala.language.postfixOps -class JobStoreWriterSpec extends CromwellTestkitSpec with Matchers with BeforeAndAfter { +class JobStoreWriterSpec extends CromwellTestKitSpec with Matchers with BeforeAndAfter { var database: WriteCountingJobStore = _ var jobStoreWriter: TestFSMRef[JobStoreWriterState, JobStoreWriterData, JobStoreWriterActor] = _ diff --git a/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala new file mode 100644 index 000000000..79e41bd70 --- /dev/null +++ b/engine/src/test/scala/cromwell/subworkflowstore/SubWorkflowStoreSpec.scala @@ -0,0 +1,87 @@ +package cromwell.subworkflowstore + +import cromwell.CromwellTestKitSpec +import cromwell.core.{JobKey, WorkflowId, WorkflowSourceFilesWithoutImports} +import cromwell.services.SingletonServicesStore +import cromwell.subworkflowstore.SubWorkflowStoreActor._ +import org.scalatest.Matchers +import org.specs2.mock.Mockito +import wdl4s.{TaskCall, WdlExpression} +import cromwell.core.ExecutionIndex._ + +import scala.concurrent.duration._ +import SubWorkflowStoreSpec._ +import akka.testkit.TestProbe +import cromwell.database.sql.tables.SubWorkflowStoreEntry +import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{SubmitWorkflow, WorkflowSubmittedToStore} +import cromwell.engine.workflow.workflowstore.{SqlWorkflowStore, WorkflowStoreActor} + +import scala.language.postfixOps + +object SubWorkflowStoreSpec { + val MaxWait = 5 seconds + val EmptyExpression = WdlExpression.fromString(""" "" """) +} + +class SubWorkflowStoreSpec extends CromwellTestKitSpec with Matchers with Mockito { + "SubWorkflowStore" should { + "work" in { + lazy val subWorkflowStore = new SqlSubWorkflowStore(SingletonServicesStore.databaseInterface) + val subWorkflowStoreService = system.actorOf(SubWorkflowStoreActor.props(subWorkflowStore)) + + lazy val workflowStore = SqlWorkflowStore(SingletonServicesStore.databaseInterface) + val workflowStoreService = system.actorOf(WorkflowStoreActor.props(workflowStore, TestProbe().ref)) + + val parentWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + val subSubWorkflowId = WorkflowId.randomId() + val call = mock[TaskCall] + call.fullyQualifiedName returns "foo.bar" + val jobKey = new JobKey { + override def scope = call + override def index: Option[Int] = None + override def attempt: Int = 0 + override def tag: String = "foobar" + } + + workflowStoreService ! SubmitWorkflow(WorkflowSourceFilesWithoutImports("", "{}", "{}")) + val rootWorkflowId = expectMsgType[WorkflowSubmittedToStore](10 seconds).workflowId + + // Query for non existing sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + // Register sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, parentWorkflowId, jobKey, subWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub workflow + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + val subWorkflowEntry = SubWorkflowStoreEntry(Option(0), parentWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subWorkflowId.toString, Some(0)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subWorkflowEntry)) + + // Register sub sub workflow + subWorkflowStoreService ! RegisterSubWorkflow(rootWorkflowId, subWorkflowId, jobKey, subSubWorkflowId) + expectMsgType[SubWorkflowStoreRegisterSuccess](MaxWait) + + // Query for sub sub workflow + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + val subSubWorkflowEntry = SubWorkflowStoreEntry(Option(0), subWorkflowId.toString, jobKey.scope.fullyQualifiedName, jobKey.index.fromIndex, jobKey.attempt, subSubWorkflowId.toString, Some(1)) + expectMsg[SubWorkflowFound](SubWorkflowFound(subSubWorkflowEntry)) + + // Delete root workflow + subWorkflowStoreService ! WorkflowComplete(rootWorkflowId) + expectMsgType[SubWorkflowStoreCompleteSuccess](MaxWait) + + // Verify that everything is gone + subWorkflowStoreService ! QuerySubWorkflow(rootWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + subWorkflowStoreService ! QuerySubWorkflow(parentWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + + subWorkflowStoreService ! QuerySubWorkflow(subWorkflowId, jobKey) + expectMsgType[SubWorkflowNotFound](MaxWait) + } + } +} diff --git a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala index 9904835dc..b3fbfee1e 100644 --- a/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/CromwellApiServiceSpec.scala @@ -7,7 +7,7 @@ import akka.actor.{Actor, Props} import akka.pattern.ask import akka.util.Timeout import com.typesafe.config.ConfigFactory -import cromwell.CromwellTestkitSpec +import cromwell.CromwellTestKitSpec import cromwell.core._ import cromwell.engine.workflow.workflowstore.WorkflowStoreActor import cromwell.engine.workflow.workflowstore.WorkflowStoreActor.{WorkflowAborted => _, _} @@ -15,7 +15,6 @@ import cromwell.server.{CromwellServerActor, CromwellSystem} import cromwell.services.metadata.MetadataService._ import cromwell.services.metadata._ import cromwell.services.metadata.impl.MetadataSummaryRefreshActor.MetadataSummarySuccess -import cromwell.util.SampleWdl.DeclarationsWorkflow._ import cromwell.util.SampleWdl.HelloWorld import org.scalatest.concurrent.{PatienceConfiguration, ScalaFutures} import org.scalatest.{FlatSpec, Matchers} @@ -67,7 +66,7 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala implicit val defaultTimeout = RouteTestTimeout(30.seconds.dilated) override def actorRefFactory = system - override val serviceRegistryActor = CromwellTestkitSpec.ServiceRegistryActorInstance + override val serviceRegistryActor = CromwellTestKitSpec.ServiceRegistryActorInstance override val workflowStoreActor = actorRefFactory.actorOf(Props(new MockWorkflowStoreActor())) override val workflowManagerActor = actorRefFactory.actorOf(Props.empty) @@ -243,7 +242,8 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala behavior of "REST API submission endpoint" it should "return 201 for a successful workflow submission " in { - Post(s"/workflows/$version", FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> HelloWorld.rawInputs.toJson.toString()))) ~> + val bodyParts: Map[String, BodyPart] = Map("wdlSource" -> BodyPart(HelloWorld.wdlSource()), "workflowInputs" -> BodyPart(HelloWorld.rawInputs.toJson.toString())) + Post(s"/workflows/$version", MultipartFormData(bodyParts)) ~> submitRoute ~> check { assertResult( @@ -261,7 +261,7 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala it should "succesfully merge and override multiple input files" in { val input1 = Map("wf.a1" -> "hello", "wf.a2" -> "world").toJson.toString - val input2 = Map.empty.toJson.toString + val input2 = Map.empty[String, String].toJson.toString val overrideInput1 = Map("wf.a2" -> "universe").toJson.toString val allInputs = mergeMaps(Seq(Option(input1), Option(input2), Option(overrideInput1))) @@ -274,9 +274,9 @@ class CromwellApiServiceSpec extends FlatSpec with CromwellApiService with Scala behavior of "REST API batch submission endpoint" it should "return 200 for a successful workflow submission " in { val inputs = HelloWorld.rawInputs.toJson + val bodyParts = Map("wdlSource" -> BodyPart(HelloWorld.wdlSource()), "workflowInputs" -> BodyPart(s"[$inputs, $inputs]")) - Post(s"/workflows/$version/batch", - FormData(Seq("wdlSource" -> HelloWorld.wdlSource(), "workflowInputs" -> s"[$inputs, $inputs]"))) ~> + Post(s"/workflows/$version/batch", MultipartFormData(bodyParts)) ~> submitBatchRoute ~> check { assertResult( diff --git a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala index 021e4a43c..b32ceb1c9 100644 --- a/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/EngineStatsActorSpec.scala @@ -14,22 +14,22 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe behavior of "EngineStatsActor" val replyTo = TestProbe() - val defaultTimeout = 100 millis + val defaultTimeout = 500 millis it should "return double zeros with no WorkflowActors" in { - TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref)) + TestActorRef(EngineStatsActor.props(List.empty[ActorRef], replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(0, 0)) } it should "return snakeyes with a single workflow with one job" in { val workflowActors = List(Props(FakeWorkflowActor(1))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 1)) } it should "return an unemployed workflow when that's the world it lives in" in { val workflowActors = List(Props(FakeWorkflowActor(0))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(1, 0)) } @@ -41,7 +41,7 @@ class EngineStatsActorSpec extends TestKitSuite("EngineStatsActor") with FlatSpe it should "return the summation of jobs for all WorkflowActors" in { val workflowActors = List(Props(FakeWorkflowActor(1)), Props(FakeWorkflowActor(2))) map { TestActorRef(_) } - TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref)) + TestActorRef(EngineStatsActor.props(workflowActors, replyTo.ref, timeout = 200 millis)) replyTo.expectMsg(defaultTimeout, EngineStats(2, 3)) } } diff --git a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala index 5ae82221b..9ecfeea42 100644 --- a/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala +++ b/engine/src/test/scala/cromwell/webservice/MetadataBuilderActorSpec.scala @@ -22,15 +22,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik behavior of "MetadataParser" - val defaultTimeout = 100 millis + val defaultTimeout = 200 millis val mockServiceRegistry = TestProbe() - val parentProbe = TestProbe() - def assertMetadataResponse(action: MetadataServiceAction, queryReply: MetadataQuery, events: Seq[MetadataEvent], expectedRes: String) = { + val parentProbe = TestProbe() val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") metadataBuilder ! action // Ask for everything mockServiceRegistry.expectMsg(defaultTimeout, action) // TestActor runs on CallingThreadDispatcher @@ -96,7 +95,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | "id": "$workflowA" |}""".stripMargin - val mdQuery = MetadataQuery(workflowA, None, None, None, None) + val mdQuery = MetadataQuery(workflowA, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, workflowAEvents, expectedRes) } @@ -113,8 +112,8 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik val events = eventList map { e => (e._1, MetadataValue(e._2), e._3) } map Function.tupled(makeEvent(workflow)) val expectedRes = s"""{ "calls": {}, $expectedJson, "id":"$workflow" }""" - val mdQuery = MetadataQuery(workflow, None, None, None, None) - val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None) + val mdQuery = MetadataQuery(workflow, None, None, None, None, expandSubWorkflows = false) + val queryAction = GetSingleWorkflowMetadataAction(workflow, None, None, expandSubWorkflows = false) assertMetadataResponse(queryAction, mdQuery, events, expectedRes) } @@ -305,7 +304,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik | } """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -326,7 +325,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } @@ -346,14 +345,14 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, events, expectedResponse) } it should "render empty Json" in { val workflowId = WorkflowId.randomId() - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) val expectedEmptyResponse = """{}""" assertMetadataResponse(queryAction, mdQuery, List.empty, expectedEmptyResponse) @@ -383,7 +382,7 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik |} """.stripMargin - val mdQuery = MetadataQuery(workflowId, None, None, None, None) + val mdQuery = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) val queryAction = GetMetadataQueryAction(mdQuery) assertMetadataResponse(queryAction, mdQuery, emptyEvents, expectedEmptyResponse) @@ -398,4 +397,98 @@ class MetadataBuilderActorSpec extends TestKitSuite("Metadata") with FlatSpecLik assertMetadataResponse(queryAction, mdQuery, valueEvents, expectedNonEmptyResponse) } + + it should "expand sub workflow metadata when asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val subEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, None, "some"), MetadataValue("sub workflow info")) + ) + + val mainQuery = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = true) + val mainQueryAction = GetMetadataQueryAction(mainQuery) + + val subQuery = MetadataQuery(subWorkflowId, None, None, None, None, expandSubWorkflows = true) + val subQueryAction = GetMetadataQueryAction(subQuery) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + metadataBuilder ! mainQueryAction + mockServiceRegistry.expectMsg(defaultTimeout, mainQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(mainQuery, mainEvents)) + mockServiceRegistry.expectMsg(defaultTimeout, subQueryAction) + mockServiceRegistry.reply(MetadataLookupResponse(subQuery, subEvents)) + + val expandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowMetadata": { + | "some": "sub workflow info", + | "calls": {}, + | "id": "$subWorkflowId" + | }, + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + parentProbe.expectMsgPF(defaultTimeout) { + case response: RequestComplete[(StatusCode, JsObject)] @unchecked => + response.response._1 shouldBe StatusCodes.OK + response.response._2 shouldBe expandedRes.parseJson + } + } + + it should "NOT expand sub workflow metadata when NOT asked for" in { + val mainWorkflowId = WorkflowId.randomId() + val subWorkflowId = WorkflowId.randomId() + + val mainEvents = List( + MetadataEvent(MetadataKey(mainWorkflowId, Option(MetadataJobKey("callA", None, 1)), "subWorkflowId"), MetadataValue(subWorkflowId)) + ) + + val queryNoExpand = MetadataQuery(mainWorkflowId, None, None, None, None, expandSubWorkflows = false) + val queryNoExpandAction = GetMetadataQueryAction(queryNoExpand) + + val parentProbe = TestProbe() + val metadataBuilder = TestActorRef(MetadataBuilderActor.props(mockServiceRegistry.ref), parentProbe.ref, s"MetadataActor-${UUID.randomUUID()}") + metadataBuilder ! queryNoExpandAction + mockServiceRegistry.expectMsg(defaultTimeout, queryNoExpandAction) + mockServiceRegistry.reply(MetadataLookupResponse(queryNoExpand, mainEvents)) + + + val nonExpandedRes = + s""" + |{ + | "calls": { + | "callA": [ + | { + | "subWorkflowId": "$subWorkflowId", + | "attempt": 1, + | "shardIndex": -1 + | } + | ] + | }, + | "id": "$mainWorkflowId" + |} + """.stripMargin + + parentProbe.expectMsgPF(defaultTimeout) { + case response: RequestComplete[(StatusCode, JsObject)] @unchecked => + response.response._1 shouldBe StatusCodes.OK + response.response._2 shouldBe nonExpandedRes.parseJson + } + } } diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala deleted file mode 100644 index e6f83b0e4..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/ContentTypeOption.scala +++ /dev/null @@ -1,15 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.OpenOption - -object ContentTypeOption { - sealed trait ContentType - case object PlainText extends ContentType with OpenOption { - override def toString = "plain/text" - } - case object Json extends ContentType with OpenOption { - override def toString = "application/json" - } -} - - diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala deleted file mode 100644 index 5d45641de..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileAttributes.scala +++ /dev/null @@ -1,23 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.attribute.{BasicFileAttributes, FileTime} - -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import org.apache.commons.codec.digest.DigestUtils - -class GcsFileAttributes(path: NioGcsPath, storageClient: Storage) extends BasicFileAttributes { - override def fileKey(): AnyRef = DigestUtils.md5Hex(path.toString) - override def isRegularFile: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def isOther: Boolean = throw new NotImplementedError("To be implemented when/if needed") - override def lastModifiedTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def size(): Long = { - val getObject = storageClient.objects.get(path.bucket, path.objectName) - val storageObject: StorageObject = getObject.execute() - storageObject.getSize.longValue() - } - override def isDirectory: Boolean = path.isDirectory - override def isSymbolicLink: Boolean = false - override def creationTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") - override def lastAccessTime(): FileTime = throw new NotImplementedError("To be implemented when/if needed") -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala deleted file mode 100644 index 215b18935..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystem.scala +++ /dev/null @@ -1,68 +0,0 @@ -package cromwell.filesystems.gcs - -import java.lang.Iterable -import java.nio.file._ -import java.nio.file.attribute.UserPrincipalLookupService -import java.nio.file.spi.FileSystemProvider -import java.util.{Collections, Set => JSet} - - -case class NotAGcsPathException(path: String) extends IllegalArgumentException(s"$path is not a valid GCS path.") - -object GcsFileSystem { - val Separator = "/" - private[gcs] val Scheme = "gs" - private[gcs] val Protocol = s"$Scheme://" - private val GsUriRegex = s"""$Protocol(.*)""".r - private val AttributeViews = Collections.singleton("basic") - - def isAbsoluteGcsPath(str: String) = str match { - case GsUriRegex(chunks) => true - case _ => false - } - - def apply(provider: GcsFileSystemProvider) = new GcsFileSystem(provider) -} - -/** - * Implements the java.nio.FileSystem interface for GoogleCloudStorage. - */ -class GcsFileSystem private(val gcsFileSystemProvider: GcsFileSystemProvider) extends FileSystem { - - import GcsFileSystem._ - - override def supportedFileAttributeViews(): JSet[String] = AttributeViews - - override def getSeparator: String = Separator - - override def getRootDirectories: Iterable[Path] = Collections.emptyList[Path] - - override def newWatchService(): WatchService = throw new NotImplementedError("GCS FS does not support Watch Service at this time") - - override def getFileStores: Iterable[FileStore] = Collections.emptyList() - - override def isReadOnly: Boolean = false - - override def provider(): FileSystemProvider = gcsFileSystemProvider - - override def isOpen: Boolean = true - - override def close(): Unit = throw new UnsupportedOperationException("GCS FS cannot be closed") - - override def getPathMatcher(syntaxAndPattern: String): PathMatcher = FileSystems.getDefault.getPathMatcher(syntaxAndPattern) - - override def getUserPrincipalLookupService: UserPrincipalLookupService = throw new UnsupportedOperationException() - - private def buildPath(first: String, more: Seq[String], forceDirectory: Boolean) = { - val directory = forceDirectory || (more.isEmpty && first.endsWith(Separator)) || more.lastOption.exists(_.endsWith(Separator)) - first match { - case GsUriRegex(chunks) => new NioGcsPath(chunks.split(Separator) ++ more.toArray[String], true, directory)(this) - case empty if empty.isEmpty => new NioGcsPath(Array.empty[String] ++ more.toArray[String], false, false)(this) - case _ => throw NotAGcsPathException(s"$first is not a gcs path") - } - } - - override def getPath(first: String, more: String*): Path = buildPath(first, more, forceDirectory = false) - - def getPathAsDirectory(first: String, more: String*): Path = buildPath(first, more, forceDirectory = true) -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala deleted file mode 100644 index 845ec29ef..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsFileSystemProvider.scala +++ /dev/null @@ -1,295 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, OutputStream} -import java.net.URI -import java.nio.channels.{Channels, SeekableByteChannel} -import java.nio.file.DirectoryStream.Filter -import java.nio.file._ -import java.nio.file.attribute.{BasicFileAttributes, FileAttribute, FileAttributeView} -import java.nio.file.spi.FileSystemProvider -import java.util -import java.util.Collections -import java.util.concurrent.{AbstractExecutorService, TimeUnit} - -import cats.instances.try_._ -import cats.syntax.functor._ -import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.client.googleapis.media.MediaHttpUploader -import com.google.api.services.storage.Storage -import com.google.api.services.storage.model.StorageObject -import com.google.cloud.hadoop.gcsio.{GoogleCloudStorageReadChannel, GoogleCloudStorageWriteChannel, ObjectWriteConditions} -import com.google.cloud.hadoop.util.{ApiErrorExtractor, AsyncWriteChannelOptions, ClientRequestHelper} -import com.typesafe.config.{Config, ConfigFactory, ConfigMemorySize} -import net.ceedubs.ficus.Ficus._ -import net.ceedubs.ficus.readers.ValueReader - -import scala.annotation.tailrec -import scala.collection.JavaConverters._ -import scala.concurrent.duration._ -import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService} -import scala.language.postfixOps -import scala.util.{Failure, Success, Try} - -object GcsFileSystemProvider { - def apply(storageClient: Storage)(implicit ec: ExecutionContext) = { - new GcsFileSystemProvider(Success(storageClient), ec) - } - - object AcceptAllFilter extends DirectoryStream.Filter[Path] { - override def accept(entry: Path): Boolean = true - } - - // To choose these numbers I first entered a prolonged period of personal consideration and deep thought. - // Then, at the end of this time, I decided to just pick some numbers arbitrarily. - private val retryInterval = 500 milliseconds - private val retryCount = 3 - - def withRetry[A](f: => A, retries: Int = retryCount): A = Try(f) match { - case Success(res) => res - case Failure(ex: GoogleJsonResponseException) - if retries > 0 && - (ex.getStatusCode == 404 || ex.getStatusCode == 500) => - // FIXME remove this sleep - Thread.sleep(retryInterval.toMillis) - withRetry(f, retries - 1) - case Failure(ex) => throw ex - } - - // TODO refactor as part of Ficus and submit a PR - implicit val configMemorySizeValueReader: ValueReader[ConfigMemorySize] = new ValueReader[ConfigMemorySize] { - override def read(config: Config, path: String): ConfigMemorySize = config.getMemorySize(path) - } -} - -/** - * Converts a Scala ExecutionContext to a Java ExecutorService. - * https://groups.google.com/forum/#!topic/scala-user/ZyHrfzD7eX8 - */ -object ExecutionContextExecutorServiceBridge { - def apply(ec: ExecutionContext): ExecutionContextExecutorService = ec match { - case null => throw new RuntimeException("Execution context cannot be null") - case eces: ExecutionContextExecutorService => eces - case executionContext => new AbstractExecutorService with ExecutionContextExecutorService { - override def prepare(): ExecutionContext = executionContext - override def isShutdown = false - override def isTerminated = false - override def shutdown() = () - override def shutdownNow() = Collections.emptyList[Runnable] - override def execute(runnable: Runnable): Unit = executionContext execute runnable - override def reportFailure(t: Throwable): Unit = executionContext reportFailure t - override def awaitTermination(length: Long,unit: TimeUnit): Boolean = false - } - } -} - -/** - * Implements java.nio.FileSystemProvider for GoogleCloudStorage - * This implementation is not complete and mostly a proof of concept that it's possible to *copy* around files from/to local/gcs. - * Copying is the only functionality that has been successfully tested (same and cross filesystems). - * - * If/when switching to Google's GCS NIO implementation, callers may need to implement various utilities built into - * this implementation, including: - * - * - Minimizing the upload buffer size, assuming the default is also on the order of megabytes of memory per upload - * - Automatically retrying transient errors - * - etc. - * - * @param storageClient Google API Storage object - * @param executionContext executionContext, will be used to perform async writes to GCS after being converted to a Java execution service - */ -class GcsFileSystemProvider private[gcs](storageClient: Try[Storage], val executionContext: ExecutionContext) extends FileSystemProvider { - import GcsFileSystemProvider._ - - private[this] lazy val config = ConfigFactory.load() - - // We want to throw an exception here if we try to use this class with a failed gcs interface - lazy val client = storageClient.get - private val executionService = ExecutionContextExecutorServiceBridge(executionContext) - private val errorExtractor = new ApiErrorExtractor() - def notAGcsPath(path: Path) = throw new IllegalArgumentException(s"$path is not a GCS path.") - - lazy val defaultFileSystem: GcsFileSystem = GcsFileSystem(this) - - private def exists(path: Path): Unit = path match { - case gcsPath: NioGcsPath => - val attempt: Try[Any] = Try(withRetry(client.objects.get(gcsPath.bucket, gcsPath.objectName).execute)) recover { - case ex: GoogleJsonResponseException - if ex.getStatusCode == 404 => if (!gcsPath.isDirectory) throw new FileNotFoundException(path.toString) - } - attempt.void.get - case _ => throw new FileNotFoundException(path.toString) - } - - /** - * Note: options and attributes are not honored. - */ - override def newByteChannel(path: Path, options: util.Set[_ <: OpenOption], attrs: FileAttribute[_]*): SeekableByteChannel = { - def createReadChannel(gcsPath: NioGcsPath) = new GoogleCloudStorageReadChannel(client, - gcsPath.bucket, - gcsPath.objectName, - errorExtractor, - new ClientRequestHelper[StorageObject]() - ) - - path match { - case gcsPath: NioGcsPath => withRetry(createReadChannel(gcsPath)) - case _ => notAGcsPath(path) - } - } - - /* - For now, default all upload buffers as small as possible, 256K per upload. Without this default the buffers are 64M. - In the future, we may possibly be able to pass information to the NioGcsPath with the expected... or Google's GCS NIO - implementation will be finished we'll need to revisit this issue again. - - See also: - - com.google.cloud.hadoop.util.AbstractGoogleAsyncWriteChannel.setUploadBufferSize - - com.google.api.client.googleapis.media.MediaHttpUploader.setContentAndHeadersOnCurrentRequest - */ - private[this] lazy val uploadBufferBytes = { - val configBytes = config.as[Option[ConfigMemorySize]]("google.upload-buffer-bytes").map(_.toBytes.toInt) - configBytes.getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) - } - - /** - * Overrides the default implementation to provide a writable channel (which newByteChannel doesn't). - * NOTE: options are not honored. - */ - override def newOutputStream(path: Path, options: OpenOption*): OutputStream = { - val contentType = options collectFirst { - case e: ContentTypeOption.ContentType => e.toString - } getOrElse ContentTypeOption.PlainText.toString - - def initializeOutputStream(gcsPath: NioGcsPath) = { - val channel = new GoogleCloudStorageWriteChannel( - executionService, - client, - new ClientRequestHelper[StorageObject](), - gcsPath.bucket, - gcsPath.objectName, - AsyncWriteChannelOptions.newBuilder().setUploadBufferSize(uploadBufferBytes).build(), - new ObjectWriteConditions(), - Map.empty[String, String].asJava, - contentType) - channel.initialize() - Channels.newOutputStream(channel) - } - - path match { - case gcsPath: NioGcsPath => withRetry(initializeOutputStream(gcsPath)) - case _ => notAGcsPath(path) - } - } - - override def copy(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def innerCopy(): Unit = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.copy(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - () - } - - withRetry(innerCopy()) - case _ => throw new UnsupportedOperationException(s"Can only copy from GCS to GCS: $source or $target is not a GCS path") - } - } - - override def delete(path: Path): Unit = { - path match { - case gcs: NioGcsPath => try { - withRetry { - client.objects.delete(gcs.bucket, gcs.objectName).execute() - () - } - } catch { - case ex: GoogleJsonResponseException if ex.getStatusCode == 404 => throw new NoSuchFileException(path.toString) - } - case _ => notAGcsPath(path) - } - } - - override def readAttributes[A <: BasicFileAttributes](path: Path, `type`: Class[A], options: LinkOption*): A = path match { - case gcsPath: NioGcsPath => - exists(path) - new GcsFileAttributes(gcsPath, client).asInstanceOf[A] - case _ => notAGcsPath(path) - } - - override def move(source: Path, target: Path, options: CopyOption*): Unit = { - (source, target) match { - case (s: NioGcsPath, d: NioGcsPath) => - def moveInner(): Unit = { - val storageObject = client.objects.get(s.bucket, s.objectName).execute - client.objects.rewrite(s.bucket, s.objectName, d.bucket, d.objectName, storageObject).execute - () - } - - withRetry(moveInner()) - case _ => throw new UnsupportedOperationException(s"Can only move from GCS to GCS: $source or $target is not a GCS path") - } - } - - def crc32cHash(path: Path) = path match { - case gcsDir: NioGcsPath => withRetry(client.objects().get(gcsDir.bucket, gcsDir.objectName).execute().getCrc32c) - case _ => notAGcsPath(path) - } - - override def checkAccess(path: Path, modes: AccessMode*): Unit = { exists(path); () } - override def createDirectory(dir: Path, attrs: FileAttribute[_]*): Unit = {} - - override def getFileSystem(uri: URI): FileSystem = defaultFileSystem - - override def isHidden(path: Path): Boolean = throw new NotImplementedError() - - private[this] lazy val maxResults = config.as[Option[Int]]("google.list-max-results").getOrElse(1000).toLong - - private def list(gcsDir: NioGcsPath) = { - val listRequest = client.objects().list(gcsDir.bucket).setMaxResults(maxResults) - listRequest.setPrefix(gcsDir.objectName) - - def objectToPath(storageObject: StorageObject): Path = { - NioGcsPath(s"$getScheme${storageObject.getBucket}${GcsFileSystem.Separator}${storageObject.getName}")(gcsDir.getFileSystem.asInstanceOf[GcsFileSystem]) - } - - // Contains a Seq corresponding to the current page of objects, plus a token for the next page of objects, if any. - case class ListPageResult(objects: Seq[StorageObject], nextPageToken: Option[String]) - - def requestListPage(pageToken: Option[String]): ListPageResult = { - val objects = withRetry(listRequest.setPageToken(pageToken.orNull).execute()) - ListPageResult(objects.getItems.asScala, Option(objects.getNextPageToken)) - } - - @tailrec - def remainingObjects(pageToken: Option[String], acc: Seq[StorageObject]): Seq[StorageObject] = { - if (pageToken.isEmpty) acc - else { - val page = requestListPage(pageToken) - remainingObjects(page.nextPageToken, acc ++ page.objects) - } - } - - val firstPage = requestListPage(pageToken = None) - val allObjects = remainingObjects(firstPage.nextPageToken, firstPage.objects) - - new DirectoryStream[Path] { - override def iterator(): util.Iterator[Path] = (allObjects map objectToPath).toIterator.asJava - override def close(): Unit = {} - } - } - - override def newDirectoryStream(dir: Path, filter: Filter[_ >: Path]): DirectoryStream[Path] = dir match { - case gcsDir: NioGcsPath => list(gcsDir) - case _ => notAGcsPath(dir) - } - override def setAttribute(path: Path, attribute: String, value: scala.Any, options: LinkOption*): Unit = throw new NotImplementedError() - override def getPath(uri: URI): Path = throw new NotImplementedError() - override def newFileSystem(uri: URI, env: util.Map[String, _]): FileSystem = { - throw new UnsupportedOperationException("GcsFileSystem provider doesn't support creation of new FileSystems at this time. Use getFileSystem instead.") - } - override def readAttributes(path: Path, attributes: String, options: LinkOption*): util.Map[String, AnyRef] = throw new NotImplementedError() - override def isSameFile(path: Path, path2: Path): Boolean = throw new NotImplementedError() - override def getFileAttributeView[V <: FileAttributeView](path: Path, `type`: Class[V], options: LinkOption*): V = throw new NotImplementedError() - override def getFileStore(path: Path): FileStore = throw new NotImplementedError() - override def getScheme: String = GcsFileSystem.Protocol -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala new file mode 100644 index 000000000..0649da9b7 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilder.scala @@ -0,0 +1,100 @@ +package cromwell.filesystems.gcs + +import java.net.URI +import java.nio.file.Path +import java.nio.file.spi.FileSystemProvider + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.cloud.RetryParams +import com.google.cloud.storage.StorageOptions +import com.google.cloud.storage.contrib.nio.{CloudStorageConfiguration, CloudStorageFileSystem, CloudStoragePath} +import com.google.common.base.Preconditions._ +import cromwell.core.WorkflowOptions +import cromwell.core.path.proxy.{PathProxy, RetryableFileSystemProviderProxy} +import cromwell.core.path.{CustomRetryParams, PathBuilder} +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +import scala.util.{Failure, Try} + +object GcsPathBuilder { + + val JsonFactory = JacksonFactory.getDefaultInstance + val HttpTransport = GoogleNetHttpTransport.newTrustedTransport + + def checkValid(uri: URI) = { + checkNotNull(uri.getScheme, s"%s does not have a gcs scheme", uri) + checkArgument( + uri.getScheme.equalsIgnoreCase(CloudStorageFileSystem.URI_SCHEME), + "Cloud Storage URIs must have '%s' scheme: %s", + CloudStorageFileSystem.URI_SCHEME, + uri + ) + checkNotNull(uri.getHost, s"%s does not have a host", uri) + } + + def isValidGcsUrl(str: String): Boolean = { + Try(checkValid(URI.create(str))).isSuccess + } + + def isGcsPath(path: Path): Boolean = { + path.getFileSystem.provider().getScheme == CloudStorageFileSystem.URI_SCHEME + } +} + +class GcsPathBuilder(authMode: GoogleAuthMode, + retryParams: RetryParams, + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions) extends PathBuilder { + authMode.validate(options) + + protected val storageOptions = StorageOptions.builder() + .authCredentials(authMode.authCredentials(options)) + .retryParams(retryParams) + .build() + + // The CloudStorageFileSystemProvider constructor is not public. Currently the only way to obtain one is through a CloudStorageFileSystem + // Moreover at this point we can use the same provider for all operations as we have usable credentials + // In order to avoid recreating a provider with every getPath call, create a dummy FileSystem just to get its provider + protected val _provider = CloudStorageFileSystem.forBucket("dummy", cloudStorageConfiguration, storageOptions).provider() + + protected def provider: FileSystemProvider = _provider + /* + * The StorageService already contains a StorageRpc object that contains a com.google.api.services.storage.Storage object + * However it is not accessible from StorageService. + * com.google.cloud.storage.Storage has some batching capabilities but not for copying. + * In order to support batch copy, we need a com.google.api.services.storage.Storage. + */ + def getHash(path: Path): Try[String] = { + path match { + case gcsPath: CloudStoragePath => Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) + case proxy: PathProxy => + val gcsPath = proxy.unbox(classOf[CloudStoragePath]).get + Try(storageOptions.service().get(gcsPath.bucket(), gcsPath.toRealPath().toString).crc32c()) + case other => Failure(new IllegalArgumentException(s"$other is not a CloudStoragePath")) + } + } + + def build(string: String): Try[Path] = { + Try { + val uri = URI.create(string) + GcsPathBuilder.checkValid(uri) + provider.getPath(uri) + } + } + + override def name: String = "Gcs" +} + +class RetryableGcsPathBuilder(authMode: GoogleAuthMode, + googleRetryParams: RetryParams, + customRetryParams: CustomRetryParams, + cloudStorageConfiguration: CloudStorageConfiguration, + options: WorkflowOptions)(implicit actorSystem: ActorSystem) + extends GcsPathBuilder(authMode, googleRetryParams, cloudStorageConfiguration, options) { + + override protected def provider = new RetryableFileSystemProviderProxy(_provider, customRetryParams) + + override def getHash(path: Path) = provider.withRetry(() => super.getHash(path)) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala new file mode 100644 index 000000000..83aad3ce8 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GcsPathBuilderFactory.scala @@ -0,0 +1,48 @@ +package cromwell.filesystems.gcs + +import akka.actor.ActorSystem +import com.google.api.client.googleapis.media.MediaHttpUploader +import com.google.cloud.RetryParams +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import com.typesafe.config.ConfigFactory +import cromwell.core.WorkflowOptions +import cromwell.core.path.{CustomRetryParams, PathBuilderFactory} +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import net.ceedubs.ficus.Ficus._ + +object GcsPathBuilderFactory { + + private[this] lazy val UploadBufferBytes = { + ConfigFactory.load().as[Option[Int]]("google.upload-buffer-bytes").getOrElse(MediaHttpUploader.MINIMUM_CHUNK_SIZE) + } + + val DefaultRetryParams = RetryParams.defaultInstance() + val DefaultCloudStorageConfiguration = { + CloudStorageConfiguration.builder() + .blockSize(UploadBufferBytes) + .permitEmptyPathComponents(true) + .stripPrefixSlash(true) + .usePseudoDirectories(true) + .build() + } +} + +case class GcsPathBuilderFactory(authMode: GoogleAuthMode, + retryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, + cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) + + extends PathBuilderFactory { + + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new GcsPathBuilder(authMode, retryParams, cloudStorageConfiguration, options) +} + +case class RetryableGcsPathBuilderFactory(authMode: GoogleAuthMode, + googleRetryParams: RetryParams = GcsPathBuilderFactory.DefaultRetryParams, + customRetryParams: CustomRetryParams = CustomRetryParams.Default, + cloudStorageConfiguration: CloudStorageConfiguration = GcsPathBuilderFactory.DefaultCloudStorageConfiguration) + + + extends PathBuilderFactory { + + def withOptions(options: WorkflowOptions)(implicit actorSystem: ActorSystem) = new RetryableGcsPathBuilder(authMode, googleRetryParams, customRetryParams, cloudStorageConfiguration, options) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala deleted file mode 100644 index 2930cc911..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleAuthMode.scala +++ /dev/null @@ -1,186 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.{FileNotFoundException, IOException, InputStreamReader} -import java.nio.file.{Files, Paths} - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp -import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} -import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.JsonFactory -import com.google.api.client.json.jackson2.JacksonFactory -import com.google.api.client.util.store.FileDataStoreFactory -import com.google.api.services.storage.{Storage, StorageScopes} -import cromwell.filesystems.gcs.GoogleAuthMode.{GcsScopes, GoogleAuthOptions} -import org.slf4j.LoggerFactory - -import scala.collection.JavaConverters._ -import scala.util.{Failure, Success, Try} - -object GoogleAuthMode { - - lazy val jsonFactory = JacksonFactory.getDefaultInstance - lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport - val RefreshTokenOptionKey = "refresh_token" - - /** - * Before it returns the raw credential, checks if the token will expire within 60 seconds. - * - * TODO: Needs more design / testing around thread safety. - * For example, the credential returned is mutable, and may be modified by another thread. - * - * Most Google clients have the ability to refresh tokens automatically, as they use the standard Google - * HttpTransport that automatically triggers credential refreshing via Credential.handleResponse. Since Cromwell - * contacts https://gcr.io directly via HTTP requests using spray-client, we need to keep the token fresh ourselves. - * - * @see Credential#handleResponse(HttpRequest, HttpResponse, boolean) - */ - implicit class EnhancedCredentials(val credential: Credential) extends AnyVal { - def freshCredential: Try[Credential] = { - val stillValid = Option(credential.getExpiresInSeconds).exists(_ > 60) - if (stillValid || credential.refreshToken()) { - Success(credential) - } else { - Failure(new Exception("Unable to refresh token")) - } - } - } - - def buildStorage(credential: Credential, applicationName: String) = { - new Storage.Builder( - httpTransport, - jsonFactory, - credential).setApplicationName(applicationName).build() - } - - trait GoogleAuthOptions { - def get(key: String): Try[String] - } - - val GcsScopes = List( - StorageScopes.DEVSTORAGE_FULL_CONTROL, - StorageScopes.DEVSTORAGE_READ_WRITE - ) -} - - -sealed trait GoogleAuthMode { - def credential(options: GoogleAuthOptions): Credential - - def assertWorkflowOptions(options: GoogleAuthOptions): Unit = () - - def name: String - - def requiresAuthFile: Boolean = false - - protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) - - protected def validateCredentials(credential: Credential) = { - Try(credential.refreshToken()) match { - case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}") - case Success(_) => credential - } - } - - def buildStorage(options: GoogleAuthOptions, applicationName: String): Storage = { - GoogleAuthMode.buildStorage(credential(options), applicationName) - } -} - -final case class ServiceAccountMode(override val name: String, accountId: String, pemPath: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - val pemFile = Paths.get(pemPath).toAbsolutePath - if (!Files.exists(pemFile)) { - throw new FileNotFoundException(s"PEM file $pemFile does not exist") - } - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setServiceAccountId(accountId) - .setServiceAccountScopes(scopes.asJava) - .setServiceAccountPrivateKeyFromPemFile(pemFile.toFile) - .build() - ) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class UserMode(override val name: String, user: String, secretsFile: String, datastoreDir: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private def filePathToSecrets(secrets: String, jsonFactory: JsonFactory) = { - val secretsPath = Paths.get(secrets).toAbsolutePath - if(!Files.isReadable(secretsPath)) { - log.warn("Secrets file does not exist or is not readable.") - } - val secretStream = new InputStreamReader(Files.newInputStream(secretsPath)) - - GoogleClientSecrets.load(jsonFactory, secretStream) - } - - private lazy val credentials: Credential = { - val clientSecrets = filePathToSecrets(secretsFile, jsonFactory) - val dataStore = Paths.get(datastoreDir).toAbsolutePath - val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) - val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, - jsonFactory, - clientSecrets, - scopes.asJava).setDataStoreFactory(dataStoreFactory).build - validateCredentials(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -// It would be goofy to have multiple auths that are application_default, but Cromwell won't prevent it. -final case class ApplicationDefaultMode(override val name: String, scopes: List[String] = GcsScopes) extends GoogleAuthMode { - import GoogleAuthMode._ - - private lazy val credentials: Credential = { - try { - validateCredentials(GoogleCredential.getApplicationDefault().createScoped(scopes.asJava)) - } catch { - case e: IOException => - log.warn("Failed to get application default credentials", e) - throw e - } - } - - override def credential(options: GoogleAuthOptions) = credentials -} - -final case class RefreshTokenMode(name: String, clientId: String, clientSecret: String) extends GoogleAuthMode with ClientSecrets { - import GoogleAuthMode._ - - override def requiresAuthFile = true - - /** - * Throws if the refresh token is not specified. - */ - override def assertWorkflowOptions(options: GoogleAuthOptions): Unit = { getToken(options); () } - - private def getToken(options: GoogleAuthOptions): String = { - options.get(RefreshTokenOptionKey).getOrElse(throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey")) - } - - override def credential(options: GoogleAuthOptions): Credential = { - validateCredentials( - new GoogleCredential.Builder().setTransport(httpTransport) - .setJsonFactory(jsonFactory) - .setClientSecrets(clientId, clientSecret) - .build() - .setRefreshToken(getToken(options)) - ) - } -} - -trait ClientSecrets { - val clientId: String - val clientSecret: String -} - -final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala index 9c5579839..8fa93b61d 100644 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/GoogleConfiguration.scala @@ -7,14 +7,12 @@ import cats.syntax.traverse._ import cats.syntax.validated._ import com.google.api.services.storage.StorageScopes import com.typesafe.config.Config +import cromwell.filesystems.gcs.auth._ import lenthall.config.ConfigValidationException import lenthall.config.ValidatedConfig._ import cromwell.core.ErrorOr._ import org.slf4j.LoggerFactory -import scala.collection.JavaConverters._ - - final case class GoogleConfiguration private (applicationName: String, authsByName: Map[String, GoogleAuthMode]) { def auth(name: String): ErrorOr[GoogleAuthMode] = { @@ -28,15 +26,15 @@ final case class GoogleConfiguration private (applicationName: String, authsByNa } object GoogleConfiguration { - + import scala.collection.JavaConverters._ private val log = LoggerFactory.getLogger("GoogleConfiguration") - private val GoogleScopes = List( + val GoogleScopes = List( StorageScopes.DEVSTORAGE_FULL_CONTROL, StorageScopes.DEVSTORAGE_READ_WRITE, "https://www.googleapis.com/auth/genomics", "https://www.googleapis.com/auth/compute" - ) + ).asJava def apply(config: Config): GoogleConfiguration = { @@ -55,10 +53,10 @@ object GoogleConfiguration { } def refreshTokenAuth(authConfig: Config, name: String) = authConfig validateAny { - cfg => RefreshTokenMode(name, cfg.getString("client-id"), cfg.getString("client-secret")) + cfg => RefreshTokenMode(name, cfg.getString("client-id"), cfg.getString("client-secret"), GoogleScopes) } - def applicationDefaultAuth(name: String): ErrorOr[GoogleAuthMode] = ApplicationDefaultMode(name, GoogleScopes).validNel + def applicationDefaultAuth(name: String): ErrorOr[GoogleAuthMode] = ApplicationDefaultMode(name).validNel val name = authConfig.getString("name") val scheme = authConfig.getString("scheme") diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala deleted file mode 100644 index 65e148f77..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/NioGcsPath.scala +++ /dev/null @@ -1,191 +0,0 @@ -package cromwell.filesystems.gcs - -import java.io.File -import java.net.URI -import java.nio.file.WatchEvent.{Kind, Modifier} -import java.nio.file._ -import java.util - -import scala.collection.JavaConverters._ -import scala.language.postfixOps -import scala.util.Try - -object NioGcsPath { - def apply(path: String)(implicit gcsFileSystem: GcsFileSystem) = gcsFileSystem.getPath(path) - - implicit class PathEnhanced(val path: Path) extends AnyVal { - def asGcsPath(implicit gcsFileSystem: GcsFileSystem) = path match { - case gcsPath: NioGcsPath => gcsPath - case otherPath: Path => getSoftPath(otherPath.toString).asInstanceOf[NioGcsPath] - case _ => throw new IllegalArgumentException("Only GcsPaths are supported.") - } - } - - /** Allow instantiation of a relative gcs path. - * Relative GCS paths can only be created via NioGcsPath methods (eg: subpath, getName...) but not through the GcsFileSystem.getPath method - * in order to avoid floating paths without root. It also ensures that a relative local path cannot mistakenly be parsed as a GCS path. - * */ - private def getSoftPath(first: String, more: String*)(implicit gcsFs: GcsFileSystem): Path = Try(gcsFs.getPath(first, more: _*)) recover { - case e: NotAGcsPathException => new NioGcsPath(first.split(GcsFileSystem.Separator) ++ more.toArray[String], false, first.endsWith(GcsFileSystem.Separator))(gcsFs) - } get - - val Protocol = GcsFileSystem.Protocol -} - -/** - * NOTE: Currently called NioGcsPath so it can exist alongside the current GcsPath class. - * If this approach was to be validated the current GcsPath class would be replaced by this one. - * This class proposes an implementation of the java.nio.Path interface for GoogleCloudStorage. - * The following methods are yet to be implemented: - * relativize - * compareTo - * @param chunks array containing all parts of the path in between separators - except the protocol (gs://) - * eg: gs://path/to/resource.txt -> chunks = [path, to, resource.txt] - * @param absolute true if this path is to be considered absolute. - * Only absolute GCS paths can be used to actually locate resources. - * Calling methods on an absolute path can return a relative paths (eg subpath). - * @param gcsFileSystem the gcsFileSystem to be used when performing operations on this path - */ -class NioGcsPath(private val chunks: Array[String], absolute: Boolean, val isDirectory: Boolean)(implicit gcsFileSystem: GcsFileSystem) extends Path { - import NioGcsPath._ - - private val separator = GcsFileSystem.Separator - - private val objectChunks = chunks match { - case values if isAbsolute && values.nonEmpty => values.tail - case _ => chunks - } - - private val fullPath = chunksToString(chunks) - - lazy val bucket: String = chunks match { - case values if values.isEmpty && isAbsolute => throw new IllegalStateException("An absolute gcs path cannot be empty") - case _ => if(isAbsolute) chunks.head else { - throw new UnsupportedOperationException("Attached gcs filesystem has no root and is not Absolute. The corresponding bucket is unknown.") - } - } - - val objectName = chunksToString(objectChunks) - - private def chunksToString(chunksArray: Array[String]): String = chunksArray.mkString(separator) - - override def subpath(beginIndex: Int, endIndex: Int): Path = { - val directory = if (endIndex == chunks.length - 1) isDirectory else true - new NioGcsPath(chunks.slice(beginIndex, endIndex), isAbsolute && beginIndex == 0, directory) - } - - override def toFile: File = throw new UnsupportedOperationException("A GCS path cannot be converted to a File.") - - override def resolveSibling(other: Path): Path = { - val otherPath = other.asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ otherPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def resolveSibling(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - new NioGcsPath(getParent.asGcsPath.chunks ++ getSoftPath(other).asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def getFileSystem: FileSystem = gcsFileSystem - - override def getName(index: Int): Path = { - val directory = if (index == chunks.length - 1) isDirectory else true - new NioGcsPath(Array(chunks(index)), isAbsolute && index == 0, directory) - } - - override def getParent: Path = chunks match { - case values if values.isEmpty || values.length == 1 => null - case values => new NioGcsPath(values.init, isAbsolute, true) - } - - override def toAbsolutePath: Path = if (isAbsolute) this else { - throw new UnsupportedOperationException(s"Attached gcs filesystem has no root. path $toString can't be converted to an absolute path.") - } - - override def relativize(other: Path): Path = other match { - case gcs: NioGcsPath => new NioGcsPath(gcs.chunks.diff(this.chunks), false, gcs.isDirectory) - case _ => throw new IllegalArgumentException(s"$other is not a GCS path.") - } - - override def getNameCount: Int = chunks.length - - override def toUri: URI = new URI(GcsFileSystem.Scheme, bucket, s"/$objectName", null) - - override def compareTo(other: Path): Int = throw new NotImplementedError() - - override def register(watcher: WatchService, events: Array[Kind[_]], modifiers: Modifier*): WatchKey = throw new UnsupportedOperationException() - - override def register(watcher: WatchService, events: Kind[_]*): WatchKey = throw new UnsupportedOperationException() - - override def getFileName: Path = chunks match { - case values if values.isEmpty => null - case _ => new NioGcsPath(Array(chunks.last), isAbsolute && chunks.length == 1, isDirectory) - } - - override def getRoot: Path = new NioGcsPath(Array(bucket), true, true) - - override def iterator(): util.Iterator[Path] = { - if (chunks.isEmpty) chunks.map(_.asInstanceOf[Path]).iterator.asJava else { - val init = chunks.init map { elt => new NioGcsPath(Array(elt), false, true).asInstanceOf[Path] } - val fullIterator = init :+ new NioGcsPath(Array(chunks.last), false, isDirectory).asInstanceOf[Path] - fullIterator.iterator.asJava - } - } - - override def normalize(): Path = if (isAbsolute) this else throw new UnsupportedOperationException("Cannot normalize a relative GCS path.") - - override def endsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.endsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def endsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.endsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def resolve(other: Path): Path = { - if (other.isAbsolute) other - else { - val otherGcs = other.asGcsPath - new NioGcsPath(chunks ++ otherGcs.chunks, isAbsolute, otherGcs.isDirectory) - } - } - - override def resolve(other: String): Path = { - val otherPath = getSoftPath(other).asGcsPath - if (otherPath.isAbsolute) otherPath - else new NioGcsPath(chunks ++ otherPath.asGcsPath.chunks, isAbsolute, otherPath.isDirectory) - } - - override def toRealPath(options: LinkOption*): Path = this - - override def startsWith(other: Path): Boolean = { - other match { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case _: NioGcsPath => chunks.startsWith(other.asGcsPath.chunks) - case _ => false - } - } - - override def startsWith(other: String): Boolean = { - Try(getSoftPath(other)) map { - case rel: NioGcsPath if !isAbsolute && rel.isAbsolute => false - case path@(_: NioGcsPath) => chunks.startsWith(path.asGcsPath.chunks) - case _ => false - } getOrElse false - } - - override def toString: String = { - if (absolute) s"$Protocol$fullPath" - else fullPath - } - - override def isAbsolute: Boolean = absolute -} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala new file mode 100644 index 000000000..26d18833d --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/GoogleAuthMode.scala @@ -0,0 +1,187 @@ +package cromwell.filesystems.gcs.auth + +import java.io.{FileNotFoundException, InputStreamReader} +import java.nio.file.Paths + +import better.files._ +import com.google.api.client.auth.oauth2.Credential +import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp +import com.google.api.client.googleapis.auth.oauth2.{GoogleAuthorizationCodeFlow, GoogleClientSecrets, GoogleCredential} +import com.google.api.client.googleapis.extensions.java6.auth.oauth2.GooglePromptReceiver +import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport +import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential +import com.google.api.client.json.jackson2.JacksonFactory +import com.google.api.client.util.store.FileDataStoreFactory +import com.google.api.services.storage.StorageScopes +import com.google.auth.oauth2.{ClientId, ServiceAccountCredentials} +import com.google.cloud.AuthCredentials +import cromwell.core.WorkflowOptions +import cromwell.filesystems.gcs.auth.GoogleAuthMode._ +import org.slf4j.LoggerFactory + +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +object GoogleAuthMode { + + lazy val jsonFactory = JacksonFactory.getDefaultInstance + lazy val httpTransport = GoogleNetHttpTransport.newTrustedTransport + + val RefreshTokenOptionKey = "refresh_token" + val GcsScopes = List( + StorageScopes.DEVSTORAGE_FULL_CONTROL, + StorageScopes.DEVSTORAGE_READ_WRITE + ).asJava + + def checkReadable(file: File) = { + if (!file.isReadable) throw new FileNotFoundException(s"File $file does not exist or is not readable") + } + + case object NoAuthMode extends GoogleAuthMode { + override def name = "no_auth" + + override def authCredentials(options: WorkflowOptions): AuthCredentials = AuthCredentials.noAuth() + override def credential(options: WorkflowOptions): Credential = new MockGoogleCredential.Builder().build() + } +} + + +sealed trait GoogleAuthMode { + protected lazy val log = LoggerFactory.getLogger(getClass.getSimpleName) + + /** + * Validate the auth mode against provided options + */ + def validate(options: WorkflowOptions): Unit = {()} + + def name: String + // Create an AuthCredentials object from the google-cloud library (https://github.com/GoogleCloudPlatform/google-cloud-java using https://github.com/google/google-auth-library-java under the hood) + def authCredentials(options: WorkflowOptions): AuthCredentials + // Create a Credential object from the google.api.client.auth library (https://github.com/google/google-api-java-client) + def credential(options: WorkflowOptions): Credential + + def requiresAuthFile: Boolean = false + + protected def validateAuthCredentials(authCredentials: AuthCredentials, scopes: java.util.Collection[String]): AuthCredentials = validate(authCredentials, authCredentials.credentials().createScoped(scopes).refresh) + + protected def validateCredential(credential: Credential) = validate(credential, credential.refreshToken) + + private def validate[T](credential: T, validation: () => Any): T = { + Try(validation()) match { + case Failure(ex) => throw new RuntimeException(s"Google credentials are invalid: ${ex.getMessage}") + case Success(_) => credential + } + } +} + +final case class ServiceAccountMode(override val name: String, + accountId: String, + pemPath: String, + scopes: java.util.List[String]) extends GoogleAuthMode { + private val pemFile = File(pemPath) + checkReadable(pemFile) + + private lazy val _authCredentials: AuthCredentials = { + val saCredentials = ServiceAccountCredentials.fromPkcs8(accountId, accountId, pemFile.contentAsString, null, scopes) + validateAuthCredentials(AuthCredentials.createFor(saCredentials.getClientId, saCredentials.getPrivateKey), scopes) + } + + private lazy val _credential: Credential = { + validateCredential( + new GoogleCredential.Builder().setTransport(httpTransport) + .setJsonFactory(jsonFactory) + .setServiceAccountId(accountId) + .setServiceAccountScopes(scopes) + .setServiceAccountPrivateKeyFromPemFile(pemFile.toJava) + .build() + ) + } + + override def authCredentials(options: WorkflowOptions) = _authCredentials + + override def credential(options: WorkflowOptions): Credential = _credential +} + +final case class UserMode(override val name: String, + user: String, + val secretsPath: String, + datastoreDir: String, + scopes: java.util.List[String]) extends GoogleAuthMode { + + private lazy val secrets = { + val secretsFile = File(secretsPath) + checkReadable(secretsFile) + + val secretStream = new InputStreamReader(secretsFile.newInputStream) + + GoogleClientSecrets.load(jsonFactory, secretStream) + } + + private lazy val _credential: Credential = { + val dataStore = Paths.get(datastoreDir).toAbsolutePath + val dataStoreFactory = new FileDataStoreFactory(dataStore.toFile) + val flow = new GoogleAuthorizationCodeFlow.Builder(httpTransport, jsonFactory, secrets, scopes).setDataStoreFactory(dataStoreFactory).build + validateCredential(new AuthorizationCodeInstalledApp(flow, new GooglePromptReceiver).authorize(user)) + } + + private lazy val _authCredentials: AuthCredentials = { + new RefreshableOAuth2Credentials(_credential.getRefreshToken, new ClientId(secrets.getDetails.getClientId, secrets.getDetails.getClientSecret)) + } + + override def credential(options: WorkflowOptions) = _credential + + override def authCredentials(options: WorkflowOptions) = _authCredentials +} + +private object ApplicationDefault { + private [auth] lazy val _AuthCredentials = AuthCredentials.createApplicationDefaults() + private [auth] lazy val _Credential: Credential = GoogleCredential.getApplicationDefault() +} + +final case class ApplicationDefaultMode(name: String) extends GoogleAuthMode { + override def authCredentials(options: WorkflowOptions) = ApplicationDefault._AuthCredentials + override def credential(options: WorkflowOptions) = ApplicationDefault._Credential +} + +final case class RefreshTokenMode(name: String, + clientId: String, + clientSecret: String, + scopes: java.util.List[String]) extends GoogleAuthMode with ClientSecrets { + import GoogleAuthMode._ + override def requiresAuthFile = true + + private def extractRefreshToken(options: WorkflowOptions): String = { + options.get(RefreshTokenOptionKey) getOrElse { + throw new IllegalArgumentException(s"Missing parameters in workflow options: $RefreshTokenOptionKey") + } + } + + override def validate(options: WorkflowOptions) = { + extractRefreshToken(options) + + () + } + + override def authCredentials(options: WorkflowOptions): AuthCredentials = { + val refreshToken = extractRefreshToken(options) + validateAuthCredentials(new RefreshableOAuth2Credentials(refreshToken, new ClientId(clientId, clientSecret)), scopes) + } + + override def credential(options: WorkflowOptions): Credential = { + val refreshToken = extractRefreshToken(options) + validateCredential( + new GoogleCredential.Builder().setTransport(httpTransport) + .setJsonFactory(jsonFactory) + .setClientSecrets(clientId, clientSecret) + .build() + .setRefreshToken(refreshToken) + ) + } +} + +trait ClientSecrets { + val clientId: String + val clientSecret: String +} + +final case class SimpleClientSecrets(clientId: String, clientSecret: String) extends ClientSecrets diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala new file mode 100644 index 000000000..ae1e32ef5 --- /dev/null +++ b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/auth/RefreshableOAuth2Credentials.scala @@ -0,0 +1,31 @@ +package cromwell.filesystems.gcs.auth + +import java.io.Serializable +import java.util.Objects + +import com.google.auth.oauth2.{ClientId, GoogleCredentials, UserCredentials} +import com.google.cloud.{AuthCredentials, RestorableState} + +class RefreshableOAuth2Credentials(refreshToken: String, clientId: ClientId) extends AuthCredentials { + private val _credentials: GoogleCredentials = new UserCredentials(clientId.getClientId, clientId.getClientSecret, refreshToken) + + private class RefreshableOAuth2CredentialsState(val refreshToken: String, val clientId: ClientId) extends RestorableState[AuthCredentials] with Serializable { + + override def restore: AuthCredentials = new RefreshableOAuth2Credentials(refreshToken, clientId) + + override def hashCode: Int = Objects.hash(refreshToken, clientId.getClientId, clientId.getClientSecret) + + override def equals(obj: Any): Boolean = { + obj.isInstanceOf[RefreshableOAuth2CredentialsState] && { + val other = obj.asInstanceOf[RefreshableOAuth2CredentialsState] + Objects.equals(refreshToken, other.refreshToken) && + Objects.equals(clientId.getClientId, other.clientId.getClientId) && + Objects.equals(clientId.getClientSecret, other.clientId.getClientSecret) + } + } + } + + override def credentials: GoogleCredentials = _credentials + + def capture: RestorableState[AuthCredentials] = new RefreshableOAuth2CredentialsState(refreshToken, clientId) +} diff --git a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala b/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala deleted file mode 100644 index 0ec2c0316..000000000 --- a/filesystems/gcs/src/main/scala/cromwell/filesystems/gcs/package.scala +++ /dev/null @@ -1,6 +0,0 @@ -package cromwell.filesystems - - -package object gcs { - type RefreshToken = String -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala deleted file mode 100644 index 3c6a28734..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsIntegrationTest.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.Tag - -object GcsIntegrationTest extends Tag("GcsIntegrationTest") diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala new file mode 100644 index 000000000..598cb5461 --- /dev/null +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GcsPathBuilderSpec.scala @@ -0,0 +1,31 @@ +package cromwell.filesystems.gcs + +import com.google.cloud.RetryParams +import com.google.cloud.storage.contrib.nio.CloudStorageConfiguration +import cromwell.core.path.CustomRetryParams +import cromwell.core.path.proxy.RetryableFileSystemProviderProxy +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.filesystems.gcs.auth.GoogleAuthMode +import org.scalatest.{FlatSpecLike, Matchers} + +class GcsPathBuilderSpec extends TestKitSuite with FlatSpecLike with Matchers { + + implicit val as = system + + behavior of "GcsPathBuilderSpec" + + it should "create a path with a retryable provider" in { + val retryablePathBuilder = new RetryableGcsPathBuilder( + GoogleAuthMode.NoAuthMode, + RetryParams.defaultInstance(), + CustomRetryParams.Default, + CloudStorageConfiguration.DEFAULT, + WorkflowOptions.empty + ) + + val path = retryablePathBuilder.build("gs://bucket/object") + path.isSuccess shouldBe true + path.get.getFileSystem.provider() shouldBe a[RetryableFileSystemProviderProxy[_]] + } + +} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala index 3eeeaf568..cc1c9fd6c 100644 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala +++ b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleConfigurationSpec.scala @@ -1,6 +1,8 @@ package cromwell.filesystems.gcs +import better.files.File import com.typesafe.config.{ConfigException, ConfigFactory} +import cromwell.filesystems.gcs.auth.{ApplicationDefaultMode, RefreshTokenMode, ServiceAccountMode, UserMode} import lenthall.config.ConfigValidationException import org.scalatest.{FlatSpec, Matchers} @@ -10,8 +12,10 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { behavior of "GoogleConfiguration" it should "parse all manner of well-formed auths" in { + val mockFile = File.newTemporaryFile() + val righteousGoogleConfig = - """ + s""" |google { | application-name = "cromwell" | @@ -30,14 +34,14 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { | name = "name-user" | scheme = "user_account" | user = "me" - | secrets-file = "/very/secret/file.txt" + | secrets-file = "${mockFile.pathAsString}" | data-store-dir = "/where/the/data/at" | }, | { | name = "name-service" | scheme = "service_account" | service-account-id = "my-google-account" - | pem-file = "/yonder/file.pem" + | pem-file = "${mockFile.pathAsString}" | } | ] |} @@ -61,13 +65,15 @@ class GoogleConfigurationSpec extends FlatSpec with Matchers { val user = (auths collectFirst { case a: UserMode => a }).get user.name shouldBe "name-user" - user.secretsFile shouldBe "/very/secret/file.txt" + user.secretsPath shouldBe mockFile.pathAsString user.datastoreDir shouldBe "/where/the/data/at" val service = (auths collectFirst { case a: ServiceAccountMode => a }).get service.name shouldBe "name-service" service.accountId shouldBe "my-google-account" - service.pemPath shouldBe "/yonder/file.pem" + service.pemPath shouldBe mockFile.pathAsString + + mockFile.delete(true) } diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala deleted file mode 100644 index 541dc75eb..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/GoogleCredentialFactorySpec.scala +++ /dev/null @@ -1,158 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Paths - -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport -import com.google.api.client.json.jackson2.JacksonFactory -import com.typesafe.config.ConfigFactory -import cromwell.filesystems.gcs.GoogleAuthMode.EnhancedCredentials -import org.scalatest.{FlatSpec, Matchers} - -import scala.util.Try - -class GoogleCredentialFactorySpec extends FlatSpec with Matchers { - import GoogleCredentialFactorySpec._ - - behavior of "GoogleCredentialFactory" - - it should "refresh a token using user credentials" taggedAs GcsIntegrationTest in { - val credential = UserMode( - name = "user", - user = secretConf("user"), - secretsFile = secretConf("secrets-file"), - datastoreDir = secretConf("data-store-dir")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a service account" taggedAs GcsIntegrationTest in { - val credential = ServiceAccountMode( - name = "service", - accountId = secretConf("service-account-id"), - pemPath = secretConf("pem-file")).credential(emptyOptions) - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } - - it should "refresh a token using a refresh token" taggedAs GcsIntegrationTest in { - val opts = GoogleOptionsMap(Map("refresh_token" -> secretConf("refresh_token"))) - - val credential = RefreshTokenMode(name = "refresh", - clientId = secretConf("client-id"), - clientSecret = secretConf("client-secret")).credential(opts) - - val firstUserCredentialsTry = credential.freshCredential - - assert(firstUserCredentialsTry.isSuccess) - val firstUserCredentials = firstUserCredentialsTry.get - - val firstRefreshedUserCredentialsTry: Try[Credential] = firstUserCredentials.freshCredential - assert(firstRefreshedUserCredentialsTry.isSuccess) - val firstRefreshedUserCredentials = firstRefreshedUserCredentialsTry.get - firstRefreshedUserCredentials.getAccessToken shouldNot be(empty) - - firstRefreshedUserCredentials.setExpiresInSeconds(59L) - - val secondRefreshedUserCredentialsTry: Try[Credential] = firstRefreshedUserCredentials.freshCredential - assert(secondRefreshedUserCredentialsTry.isSuccess) - - val secondRefreshedUserCredentials = secondRefreshedUserCredentialsTry.get - secondRefreshedUserCredentials.getAccessToken shouldNot be(empty) - secondRefreshedUserCredentials.getExpiresInSeconds shouldNot be(null) - secondRefreshedUserCredentials.getExpiresInSeconds.longValue should be > 60L - } - - it should "not refresh an empty token" in { - - val wrongCredentials = new GoogleCredential.Builder() - .setTransport(GoogleNetHttpTransport.newTrustedTransport) - .setJsonFactory(JacksonFactory.getDefaultInstance) - .setClientSecrets("fakeId", "fakeSecret") - .build() - - val exception = wrongCredentials.freshCredential.failed.get - - exception.getMessage should be("Unable to refresh token") - } - - it should "refresh a token using application default credentials" taggedAs GcsIntegrationTest in { - val credential = applicationDefaultCredential - - val firstCredentialTry: Try[Credential] = credential.freshCredential - assert(firstCredentialTry.isSuccess) - val firstCredential = firstCredentialTry.get - firstCredential.getAccessToken shouldNot be(empty) - - firstCredential.setExpiresInSeconds(59L) - - val secondCredentialTry: Try[Credential] = firstCredential.freshCredential - assert(secondCredentialTry.isSuccess) - - val secondCredential = secondCredentialTry.get - secondCredential.getAccessToken shouldNot be(empty) - secondCredential.getExpiresInSeconds shouldNot be(null) - secondCredential.getExpiresInSeconds.longValue should be > 60L - } -} - -object GoogleCredentialFactorySpec { - /* - - To run this integration spec, your cromwell-credentials.conf file should have the following keys for the listed tests: - - // For testing UserMode - user = "" - secrets-file = "" - data-store-dir = "" - - // For testing ServiceAccountMode - service-account-id = "" - pem-file = "" - - // For testing RefreshTokenMode - client-id = "" - client-secret = "" - refresh_token = "" - - */ - - private lazy val credentialsConfig = ConfigFactory.parseFile(Paths.get("cromwell-credentials.conf").toFile) - - private def secretConf(path: String) = credentialsConfig.getString(path) - - private val emptyOptions = GoogleOptionsMap(Map.empty) - - def applicationDefaultCredential = ApplicationDefaultMode(name = "default").credential(emptyOptions) -} - -case class GoogleOptionsMap(map: Map[String, String]) extends GoogleAuthMode.GoogleAuthOptions { - override def get(key: String): Try[String] = Try { map(key) } -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala deleted file mode 100644 index af569d7f0..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/MockGcsFileSystemBuilder.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.filesystems.gcs - -import scala.util.Failure - -object MockGcsFileSystemBuilder { - val mockGcsFileSystem = new GcsFileSystemProvider( - Failure(new Exception("No Storage object available")), - scala.concurrent.ExecutionContext.global).defaultFileSystem -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala deleted file mode 100644 index ccb35efa6..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/NioGcsPathSpec.scala +++ /dev/null @@ -1,291 +0,0 @@ -package cromwell.filesystems.gcs - -import java.nio.file.Path - -import org.scalatest.mockito.MockitoSugar -import org.scalatest.prop.TableDrivenPropertyChecks._ -import org.scalatest.prop.Tables.Table -import org.scalatest.{FlatSpec, Matchers} - -class NioGcsPathSpec extends FlatSpec with Matchers with MockitoSugar { - - behavior of "NioGcsPath" - - implicit val GCSFs = MockGcsFileSystemBuilder.mockGcsFileSystem - - it should "implement toString" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.toString shouldBe "gs://absolute/path/to/somewhere" - relPath1.toString shouldBe "some/relative/path" - } - - it should "implement subpath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val absSub1 = absPath1.subpath(0, 2) - absSub1.isAbsolute shouldBe true - absSub1.toString shouldBe "gs://absolute/path" - - val absSub2 = absPath1.subpath(1, 2) - absSub2.isAbsolute shouldBe false - absSub2.toString shouldBe "path" - - val relSub1 = relPath1.subpath(0, 2) - relSub1.isAbsolute shouldBe false - relSub1.toString shouldBe "some/relative" - } - - it should "implement resolveSibling" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absSibling = absPath1.resolveSibling("somewhere else") - absSibling.isAbsolute shouldBe true - absSibling.toString shouldBe "gs://absolute/path/to/somewhere else" - - val absSiblingPath = absPath1.resolveSibling(relPath1) - absSiblingPath.isAbsolute shouldBe true - absSiblingPath.toString shouldBe "gs://absolute/path/to/some/relative/path" - - val absRel = relPath1.resolveSibling("other path") - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "some/relative/other path" - - val absRelPath = relPath1.resolveSibling(relPath2) - absRelPath.isAbsolute shouldBe false - absRelPath.toString shouldBe "some/relative/another/relative/resource/path" - } - - it should "implement resolve" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val absPath2 = new NioGcsPath(Array("absolute", "location"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - val relPath2 = new NioGcsPath(Array("another", "relative", "resource", "path"), false, true) - - val absToRel = absPath1.resolve(relPath1) - absToRel.isAbsolute shouldBe true - absToRel.toString shouldBe "gs://absolute/path/to/somewhere/some/relative/path" - - val absToAbs = absPath1.resolve(absPath2) - absToAbs.isAbsolute shouldBe true - absToAbs.toString shouldBe "gs://absolute/location" - - val relToAbs = relPath1.resolve(absPath1) - relToAbs.isAbsolute shouldBe true - relToAbs.toString shouldBe "gs://absolute/path/to/somewhere" - - val relToRel = relPath1.resolve(relPath2) - relToRel.isAbsolute shouldBe false - relToRel.toString shouldBe "some/relative/path/another/relative/resource/path" - } - - it should "implement getName" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val nameAbs1 = absPath1.getName(0) - nameAbs1.isAbsolute shouldBe true - nameAbs1.toString shouldBe "gs://absolute" - - val nameAbs2 = absPath1.getName(1) - nameAbs2.isAbsolute shouldBe false - nameAbs2.toString shouldBe "path" - - val nameRel1 = relPath1.getName(0) - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some" - - val nameRel2 = relPath1.getName(1) - nameRel2.isAbsolute shouldBe false - nameRel2.toString shouldBe "relative" - } - - it should "implement getParent" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val parentAbs1 = absPath1.getParent - parentAbs1.isAbsolute shouldBe true - parentAbs1.toString shouldBe "gs://absolute/path/to" - - empty.getParent shouldBe null - singleton.getParent shouldBe null - - val nameRel1 = relPath1.getParent - nameRel1.isAbsolute shouldBe false - nameRel1.toString shouldBe "some/relative" - } - - it should "implement toAbsolutePath" in { - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val abs = absPath1.toAbsolutePath - abs.isAbsolute shouldBe true - abs.toString shouldBe "gs://absolute/path/to/somewhere" - - an[Exception] shouldBe thrownBy(relPath1.toAbsolutePath) - } - - it should "implement getNameCount" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singleton = new NioGcsPath(Array("singleton"), true, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - absPath1.getNameCount shouldBe 4 - relPath1.getNameCount shouldBe 3 - empty.getNameCount shouldBe 0 - singleton.getNameCount shouldBe 1 - } - - it should "implement getFileName" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - val emptyFileName = empty.getFileName - emptyFileName shouldBe null - - val singletonAbsFileName = singletonAbs.getFileName - singletonAbsFileName.isAbsolute shouldBe true - singletonAbsFileName.toString shouldBe "gs://singleton" - - val singletonRelFileName = singletonRel.getFileName - singletonRelFileName.isAbsolute shouldBe false - singletonRelFileName.toString shouldBe "singleton" - - val relFileName = relPath1.getFileName - relFileName.isAbsolute shouldBe false - relFileName.toString shouldBe "path" - - val absFileName = absPath1.getFileName - absFileName.isAbsolute shouldBe false - absFileName.toString shouldBe "somewhere" - } - - it should "implement getIterator" in { - val empty = new NioGcsPath(Array.empty[String], true, true) - val singletonAbs = new NioGcsPath(Array("singleton"), true, true) - val singletonRel = new NioGcsPath(Array("singleton"), false, true) - val absPath1 = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val relPath1 = new NioGcsPath(Array("some", "relative", "path"), false, true) - - empty.iterator().hasNext shouldBe false - - val singletonAbsIterator = singletonAbs.iterator() - val nextAbsSingleton: Path = singletonAbsIterator.next() - nextAbsSingleton.isAbsolute shouldBe false - nextAbsSingleton.toString shouldBe "singleton" - singletonAbsIterator.hasNext shouldBe false - - val singletonRelIterator = singletonRel.iterator() - val nextRelSingleton: Path = singletonRelIterator.next() - nextRelSingleton.isAbsolute shouldBe false - nextRelSingleton.toString shouldBe "singleton" - singletonRelIterator.hasNext shouldBe false - - val relIterator = relPath1.iterator() - val nextRel: Path = relIterator.next() - nextRel.isAbsolute shouldBe false - nextRel.toString shouldBe "some" - relIterator.next().toString shouldBe "relative" - relIterator.next().toString shouldBe "path" - relIterator.hasNext shouldBe false - - val absIterator = absPath1.iterator() - val absRel: Path = absIterator.next() - absRel.isAbsolute shouldBe false - absRel.toString shouldBe "absolute" - absIterator.next().toString shouldBe "path" - absIterator.next().toString shouldBe "to" - absIterator.next().toString shouldBe "somewhere" - absIterator.hasNext shouldBe false - } - - it should "implement startsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val startsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to"), true, true) - val doesntStartsWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathStartingLikeRel = new NioGcsPath(Array("some", "relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val startsWithRelPath = new NioGcsPath(Array("some", "relative"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("some", "relative", "other", "path"), false, true) - val relPathStartingLikeAbs = new NioGcsPath(Array("absolute", "path", "to"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, startsWithAbsPath, true), - (absPath, doesntStartsWithAbsPath, false), - (absPath, relPathStartingLikeAbs, true), - (absPath, relPath, false), - (relPath, startsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathStartingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - val startsWith: Boolean = p1.startsWith(p2) - startsWith shouldBe res - val startsWith1: Boolean = p1.startsWith(p2.toString) - startsWith1 shouldBe res - } - } - - it should "implement endsWith" in { - val empty = new NioGcsPath(Array.empty[String], false, true) - val singletonAbs = new NioGcsPath(Array("absolute"), true, true) - - val absPath = new NioGcsPath(Array("absolute", "path", "to", "somewhere"), true, true) - val doesntEndWithAbsPath = new NioGcsPath(Array("absolute", "path", "to", "another", "place"), true, true) - val absPathEndingLikeRel = new NioGcsPath(Array("relative", "path"), true, true) - - val relPath = new NioGcsPath(Array("some", "relative", "path"), false, true) - val endsWithRelPath = new NioGcsPath(Array("relative", "path"), false, true) - val doesntStartsWithRelPath = new NioGcsPath(Array("relative", "other", "path"), false, true) - val relPathEndingLikeAbs = new NioGcsPath(Array("path", "to", "somewhere"), false, true) - - val paths = Table( - ("path1", "path2", "result"), - (empty, empty, true), - (empty, absPath, false), - (singletonAbs, singletonAbs, true), - (absPath, absPath, true), - (absPath, doesntEndWithAbsPath, false), - (absPath, relPathEndingLikeAbs, true), - (absPath, relPath, false), - (relPath, endsWithRelPath, true), - (relPath, doesntStartsWithRelPath, false), - (relPath, absPathEndingLikeRel, false), - (relPath, absPath, false) - ) - - forAll(paths) { (p1, p2, res) => - p1.endsWith(p2) shouldBe res - p1.endsWith(p2.toString) shouldBe res - } - } - - it should "implement toUri" in { - val file = new NioGcsPath(Array("some", "file"), true, false) - val uri = file.toUri - uri.toString shouldBe "gs://some/file" - } - -} diff --git a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala b/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala deleted file mode 100644 index f959dcad5..000000000 --- a/filesystems/gcs/src/test/scala/cromwell/filesystems/gcs/RefreshTokenModeSpec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package cromwell.filesystems.gcs - -import org.scalatest.{FlatSpec, Matchers} - -class RefreshTokenModeSpec extends FlatSpec with Matchers { - - val refreshToken = RefreshTokenMode(name = "bar", clientId = "secret-id", clientSecret = "secret-secret") - - behavior of "RefreshTokenMode" - - it should "assert good workflow options" in { - val goodOptions = GoogleOptionsMap(Map("refresh_token" -> "token")) - refreshToken.assertWorkflowOptions(goodOptions) - } - - it should "fail to assert bad workflow options" in { - val badOptions = GoogleOptionsMap(Map("fresh_tokin" -> "broken")) - val noOptions = GoogleOptionsMap(Map.empty[String, String]) - - List(badOptions, noOptions).foreach { option => - the[IllegalArgumentException] thrownBy { - refreshToken.assertWorkflowOptions(option) - } should have message s"Missing parameters in workflow options: refresh_token" - } - } -} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index a902e1b2a..ee130f095 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -2,7 +2,7 @@ import sbt._ object Dependencies { lazy val lenthallV = "0.19" - lazy val wdl4sV = "0.6" + lazy val wdl4sV = "0.7" lazy val sprayV = "1.3.3" /* spray-json is an independent project from the "spray suite" @@ -12,22 +12,37 @@ object Dependencies { - http://doc.akka.io/docs/akka/2.4/scala/http/common/json-support.html#akka-http-spray-json */ lazy val sprayJsonV = "1.3.2" - lazy val akkaV = "2.4.9" + lazy val akkaV = "2.4.12" lazy val slickV = "3.1.1" - lazy val googleClientApiV = "1.20.0" + lazy val googleClientApiV = "1.22.0" + lazy val googleGenomicsServicesApiV = "1.20.0" lazy val betterFilesV = "2.16.0" lazy val catsV = "0.7.2" // Internal collections of dependencies + private val catsDependencies = List( + "org.typelevel" %% "cats" % "0.7.2", + "com.github.benhutchison" %% "mouse" % "0.5" + ) map (_ + /* + Exclude test framework cats-laws and its transitive dependency scalacheck. + If sbt detects scalacheck, it tries to run it. + Explicitly excluding the two problematic artifacts instead of including the three (or four?). + https://github.com/typelevel/cats/tree/v0.7.2#getting-started + Re "_2.11", see also: https://github.com/sbt/sbt/issues/1518 + */ + exclude("org.typelevel", "cats-laws_2.11") + exclude("org.typelevel", "cats-kernel-laws_2.11") + ) + private val baseDependencies = List( "org.broadinstitute" %% "lenthall" % lenthallV, - "org.typelevel" %% "cats" % catsV, - "com.github.benhutchison" %% "mouse" % "0.5", "com.iheart" %% "ficus" % "1.3.0", "org.scalatest" %% "scalatest" % "3.0.0" % Test, - "org.specs2" %% "specs2" % "3.7" % Test - ) + "org.pegdown" % "pegdown" % "1.6.0" % Test, + "org.specs2" %% "specs2-mock" % "3.8.5" % Test + ) ++ catsDependencies private val slf4jBindingDependencies = List( // http://logback.qos.ch/dependencies.html @@ -65,10 +80,10 @@ object Dependencies { ) private val googleCloudDependencies = List( - "com.google.gcloud" % "gcloud-java" % "0.0.9", - "com.google.oauth-client" % "google-oauth-client" % googleClientApiV, - "com.google.cloud.bigdataoss" % "gcsio" % "1.4.4", - "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev14-" + googleClientApiV) + "com.google.apis" % "google-api-services-genomics" % ("v1alpha2-rev14-" + googleGenomicsServicesApiV), + "com.google.cloud" % "google-cloud-nio" % "0.3.0" + exclude("com.google.api.grpc", "grpc-google-common-protos") + exclude("com.google.cloud.datastore", "datastore-v1-protos") ) private val dbmsDependencies = List( @@ -86,15 +101,12 @@ object Dependencies { // Sub-project dependencies, added in addition to any dependencies inherited from .dependsOn(). - val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies + val gcsFileSystemDependencies = baseDependencies ++ googleApiClientDependencies ++ googleCloudDependencies ++ List ( + "com.github.pathikrit" %% "better-files" % betterFilesV + ) val databaseSqlDependencies = baseDependencies ++ slickDependencies ++ dbmsDependencies - val databaseMigrationDependencies = List( - "org.broadinstitute" %% "wdl4s" % wdl4sV, // Used in migration scripts - "com.github.pathikrit" %% "better-files" % betterFilesV % Test - ) ++ baseDependencies ++ liquibaseDependencies ++ dbmsDependencies - val coreDependencies = List( "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "org.broadinstitute" %% "wdl4s" % wdl4sV, @@ -108,6 +120,10 @@ object Dependencies { // TODO: We're not using the "F" in slf4j. Core only supports logback, specifically the WorkflowLogger. slf4jBindingDependencies + val databaseMigrationDependencies = List( + "com.github.pathikrit" %% "better-files" % betterFilesV % Test + ) ++ liquibaseDependencies ++ dbmsDependencies + val htCondorBackendDependencies = List( "com.twitter" %% "chill" % "0.8.0", "org.mongodb" %% "casbah" % "3.0.0" @@ -118,12 +134,9 @@ object Dependencies { ) ++ sprayServerDependencies val engineDependencies = List( - "com.typesafe.scala-logging" %% "scala-logging" % "3.4.0", "org.webjars" % "swagger-ui" % "2.1.1", "commons-codec" % "commons-codec" % "1.10", "commons-io" % "commons-io" % "2.5", - "org.typelevel" %% "cats" % catsV, - "com.github.pathikrit" %% "better-files" % betterFilesV, "io.swagger" % "swagger-parser" % "1.0.22" % Test, "org.yaml" % "snakeyaml" % "1.17" % Test ) ++ sprayServerDependencies diff --git a/project/Merging.scala b/project/Merging.scala index 0591a09f9..2c941a76e 100644 --- a/project/Merging.scala +++ b/project/Merging.scala @@ -25,10 +25,15 @@ object Merging { MergeStrategy.filterDistinctLines case ("spring.schemas" :: Nil) | ("spring.handlers" :: Nil) => MergeStrategy.filterDistinctLines + case "io.netty.versions.properties" :: Nil => + MergeStrategy.first + case "maven" :: "com.google.guava" :: xs => + MergeStrategy.first case _ => MergeStrategy.deduplicate } case "asm-license.txt" | "overview.html" | "cobertura.properties" => MergeStrategy.discard + case _ => MergeStrategy.deduplicate } } \ No newline at end of file diff --git a/project/Settings.scala b/project/Settings.scala index 4a092ce68..f59d102e8 100644 --- a/project/Settings.scala +++ b/project/Settings.scala @@ -6,8 +6,8 @@ import Version._ import sbt.Keys._ import sbt._ import sbtassembly.AssemblyPlugin.autoImport._ -import sbtrelease.ReleasePlugin import sbtdocker.DockerPlugin.autoImport._ +import sbtrelease.ReleasePlugin object Settings { @@ -48,7 +48,13 @@ object Settings { "-Ywarn-numeric-widen", "-Ywarn-value-discard", "-Ywarn-unused", - "-Ywarn-unused-import" + "-Ywarn-unused-import", + "-Xfatal-warnings" + ) + + val docSettings = List( + // http://stackoverflow.com/questions/31488335/scaladoc-2-11-6-fails-on-throws-tag-with-unable-to-find-any-member-to-link#31497874 + "-no-link-warnings" ) lazy val assemblySettings = Seq( @@ -58,14 +64,17 @@ object Settings { logLevel in assembly := Level.Info, assemblyMergeStrategy in assembly := customMergeStrategy ) - + lazy val dockerSettings = Seq( imageNames in docker := Seq( - ImageName( - namespace = Option("broadinstitute"), - repository = name.value, - tag = Some(s"${version.value}") - ) + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(cromwellVersion)), + ImageName( + namespace = Option("broadinstitute"), + repository = name.value, + tag = Option(version.value)) ), dockerfile in docker := { // The assembly task generates a fat JAR file @@ -77,7 +86,7 @@ object Settings { expose(8000) add(artifact, artifactTargetPath) runRaw(s"ln -s $artifactTargetPath /app/cromwell.jar") - + // If you use the 'exec' form for an entry point, shell processing is not performed and // environment variable substitution does not occur. Thus we have to /bin/bash here // and pass along any subsequent command line arguments @@ -89,8 +98,7 @@ object Settings { cache = false, removeIntermediateContainers = BuildOptions.Remove.Always ) - ) - + ) val commonSettings = ReleasePlugin.projectSettings ++ testSettings ++ assemblySettings ++ dockerSettings ++ cromwellVersionWithGit ++ publishingSettings ++ List( @@ -98,6 +106,7 @@ object Settings { scalaVersion := "2.11.8", resolvers ++= commonResolvers, scalacOptions ++= compilerSettings, + scalacOptions in (Compile, doc) ++= docSettings, parallelExecution := false ) diff --git a/project/Testing.scala b/project/Testing.scala index 82df707d5..46ca8f155 100644 --- a/project/Testing.scala +++ b/project/Testing.scala @@ -10,23 +10,25 @@ object Testing { lazy val DbmsTest = config("dbms") extend Test lazy val DockerTestTag = "DockerTest" - lazy val UseDockerTaggedTests = Tests.Argument("-n", DockerTestTag) - lazy val DontUseDockerTaggedTests = Tests.Argument("-l", DockerTestTag) + lazy val UseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DockerTestTag) + lazy val DontUseDockerTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DockerTestTag) lazy val CromwellIntegrationTestTag = "CromwellIntegrationTest" - lazy val UseCromwellIntegrationTaggedTests = Tests.Argument("-n", CromwellIntegrationTestTag) - lazy val DontUseCromwellIntegrationTaggedTests = Tests.Argument("-l", CromwellIntegrationTestTag) + lazy val UseCromwellIntegrationTaggedTests = + Tests.Argument(TestFrameworks.ScalaTest, "-n", CromwellIntegrationTestTag) + lazy val DontUseCromwellIntegrationTaggedTests = + Tests.Argument(TestFrameworks.ScalaTest, "-l", CromwellIntegrationTestTag) lazy val GcsIntegrationTestTag = "GcsIntegrationTest" - lazy val UseGcsIntegrationTaggedTests = Tests.Argument("-n", GcsIntegrationTestTag) - lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument("-l", GcsIntegrationTestTag) + lazy val UseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", GcsIntegrationTestTag) + lazy val DontUseGcsIntegrationTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", GcsIntegrationTestTag) lazy val DbmsTestTag = "DbmsTest" - lazy val UseDbmsTaggedTests = Tests.Argument("-n", DbmsTestTag) - lazy val DontUseDbmsTaggedTests = Tests.Argument("-l", DbmsTestTag) + lazy val UseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-n", DbmsTestTag) + lazy val DontUseDbmsTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", DbmsTestTag) lazy val PostMVPTag = "PostMVP" - lazy val DontUsePostMVPTaggedTests = Tests.Argument("-l", PostMVPTag) + lazy val DontUsePostMVPTaggedTests = Tests.Argument(TestFrameworks.ScalaTest, "-l", PostMVPTag) lazy val TestReportArgs = Tests.Argument(TestFrameworks.ScalaTest, "-oDSI", "-h", "target/test-reports") diff --git a/project/Version.scala b/project/Version.scala index e6de0a5cd..70adedb03 100644 --- a/project/Version.scala +++ b/project/Version.scala @@ -3,8 +3,8 @@ import sbt.Keys._ import sbt._ object Version { - // Upcoming release, or current if we're on the master branch - val cromwellVersion = "0.22" + // Upcoming release, or current if we're on a master / hotfix branch + val cromwellVersion = "23" // Adapted from SbtGit.versionWithGit def cromwellVersionWithGit: Seq[Setting[_]] = @@ -39,6 +39,7 @@ object Version { // The project isSnapshot string passed in via command line settings, if desired. val isSnapshot = sys.props.get("project.isSnapshot").forall(_.toBoolean) - if (isSnapshot) s"$version-SNAPSHOT" else version + // For now, obfuscate SNAPSHOTs from sbt's developers: https://github.com/sbt/sbt/issues/2687#issuecomment-236586241 + if (isSnapshot) s"$version-SNAP" else version } } diff --git a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala index 5284a9a8e..bf1df98d9 100644 --- a/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala +++ b/services/src/main/scala/cromwell/services/metadata/CallMetadataKeys.scala @@ -17,4 +17,6 @@ object CallMetadataKeys { val BackendLogsPrefix = "backendLogs" val JobId = "jobId" val CallRoot = "callRoot" + val SubWorkflowId = "subWorkflowId" + val SubWorkflowMetadata = "subWorkflowMetadata" } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala index 374169d05..6542c2082 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataQuery.scala @@ -36,7 +36,7 @@ case object MetadataBoolean extends MetadataType { override val typeName = "bool object MetadataValue { def apply(value: Any) = { - value match { + Option(value).getOrElse("") match { case WdlInteger(i) => new MetadataValue(i.toString, MetadataInt) case WdlFloat(f) => new MetadataValue(f.toString, MetadataNumber) case WdlBoolean(b) => new MetadataValue(b.toString, MetadataBoolean) @@ -75,16 +75,17 @@ object MetadataQueryJobKey { case class MetadataQuery(workflowId: WorkflowId, jobKey: Option[MetadataQueryJobKey], key: Option[String], includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) object MetadataQuery { - def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None) + def forWorkflow(workflowId: WorkflowId) = MetadataQuery(workflowId, None, None, None, None, expandSubWorkflows = false) def forJob(workflowId: WorkflowId, jobKey: MetadataJobKey) = { - MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None) + MetadataQuery(workflowId, Option(MetadataQueryJobKey.forMetadataJobKey(jobKey)), None, None, None, expandSubWorkflows = false) } def forKey(key: MetadataKey) = { - MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None) + MetadataQuery(key.workflowId, key.jobKey map MetadataQueryJobKey.forMetadataJobKey, Option(key.key), None, None, expandSubWorkflows = false) } } diff --git a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala index 672f68580..e2a62c784 100644 --- a/services/src/main/scala/cromwell/services/metadata/MetadataService.scala +++ b/services/src/main/scala/cromwell/services/metadata/MetadataService.scala @@ -50,7 +50,8 @@ object MetadataService { case class PutMetadataAction(events: Iterable[MetadataEvent]) extends MetadataServiceAction case class GetSingleWorkflowMetadataAction(workflowId: WorkflowId, includeKeysOption: Option[NonEmptyList[String]], - excludeKeysOption: Option[NonEmptyList[String]]) + excludeKeysOption: Option[NonEmptyList[String]], + expandSubWorkflows: Boolean) extends ReadAction case class GetMetadataQueryAction(key: MetadataQuery) extends ReadAction case class GetStatus(workflowId: WorkflowId) extends ReadAction diff --git a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala index 76476d3fd..5250107b6 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/MetadataDatabaseAccess.scala @@ -107,19 +107,19 @@ trait MetadataDatabaseAccess { val uuid = query.workflowId.id.toString val futureMetadata: Future[Seq[MetadataEntry]] = query match { - case MetadataQuery(_, None, None, None, None) => databaseInterface.queryMetadataEntries(uuid) - case MetadataQuery(_, None, Some(key), None, None) => databaseInterface.queryMetadataEntries(uuid, key) - case MetadataQuery(_, Some(jobKey), None, None, None) => + case MetadataQuery(_, None, None, None, None, _) => databaseInterface.queryMetadataEntries(uuid) + case MetadataQuery(_, None, Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key) + case MetadataQuery(_, Some(jobKey), None, None, None, _) => databaseInterface.queryMetadataEntries(uuid, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, Some(jobKey), Some(key), None, None) => + case MetadataQuery(_, Some(jobKey), Some(key), None, None, _) => databaseInterface.queryMetadataEntries(uuid, key, jobKey.callFqn, jobKey.index, jobKey.attempt) - case MetadataQuery(_, None, None, Some(includeKeys), None) => + case MetadataQuery(_, None, None, Some(includeKeys), None, _) => databaseInterface. queryMetadataEntriesLikeMetadataKeys(uuid, includeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, None, Some(excludeKeys)) => + case MetadataQuery(_, None, None, None, Some(excludeKeys), _) => databaseInterface. queryMetadataEntryNotLikeMetadataKeys(uuid, excludeKeys.map(_ + "%"), requireEmptyJobKey = false) - case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys)) => Future.failed( + case MetadataQuery(_, None, None, Some(includeKeys), Some(excludeKeys), _) => Future.failed( new IllegalArgumentException( s"Include/Exclude keys may not be mixed: include = $includeKeys, exclude = $excludeKeys")) case invalidQuery => Future.failed(new IllegalArgumentException( diff --git a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala index ebdc500de..5308d69f2 100644 --- a/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala +++ b/services/src/main/scala/cromwell/services/metadata/impl/ReadMetadataActor.scala @@ -5,7 +5,7 @@ import cromwell.core.Dispatcher.ApiDispatcher import cromwell.core.{WorkflowId, WorkflowSubmitted} import cromwell.services.SingletonServicesStore import cromwell.services.metadata.MetadataService._ -import cromwell.services.metadata.{MetadataQuery, WorkflowQueryParameters} +import cromwell.services.metadata.{CallMetadataKeys, MetadataQuery, WorkflowQueryParameters} import scala.concurrent.Future import scala.util.{Failure, Success, Try} @@ -19,9 +19,12 @@ class ReadMetadataActor extends Actor with ActorLogging with MetadataDatabaseAcc implicit val ec = context.dispatcher def receive = { - case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption) => - queryAndRespond(MetadataQuery(workflowId, None, None, includeKeysOption, excludeKeysOption)) - case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _)) => queryAndRespond(query) + case GetSingleWorkflowMetadataAction(workflowId, includeKeysOption, excludeKeysOption, expandSubWorkflows) => + val includeKeys = if (expandSubWorkflows) { + includeKeysOption map { _.::(CallMetadataKeys.SubWorkflowId) } + } else includeKeysOption + queryAndRespond(MetadataQuery(workflowId, None, None, includeKeys, excludeKeysOption, expandSubWorkflows)) + case GetMetadataQueryAction(query@MetadataQuery(_, _, _, _, _, _)) => queryAndRespond(query) case GetStatus(workflowId) => queryStatusAndRespond(workflowId) case GetLogs(workflowId) => queryLogsAndRespond(workflowId) case query: WorkflowQuery[_] => queryWorkflowsAndRespond(query.uri, query.parameters) diff --git a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala index 291b41988..7558a3988 100644 --- a/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala +++ b/services/src/test/scala/cromwell/services/ServicesStoreSpec.scala @@ -160,6 +160,7 @@ object ServicesStoreSpec { s""" |db.url = "jdbc:hsqldb:mem:$${uniqueSchema};shutdown=false;hsqldb.tx=mvcc" |db.driver = "org.hsqldb.jdbcDriver" + |db.connectionTimeout = 3000 |driver = "slick.driver.HsqldbDriver$$" |liquibase.updateSchema = false |""".stripMargin) diff --git a/src/bin/travis/afterSuccess.sh b/src/bin/travis/afterSuccess.sh new file mode 100755 index 000000000..e8bd24aae --- /dev/null +++ b/src/bin/travis/afterSuccess.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -e + +echo "BUILD_TYPE='$BUILD_TYPE'" +echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" +echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" + +if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then + + if [ "$TRAVIS_BRANCH" == "develop" ]; then + sbt 'set test in Test := {}' publish + + elif [[ "$TRAVIS_BRANCH" =~ ^[0-9\.]+_hotfix$ ]]; then + docker login -u="$DOCKER_USERNAME" -p="$DOCKER_PASSWORD" + sbt 'set test in Test := {}' -Dproject.isSnapshot=false dockerBuildAndPush + + fi + +fi diff --git a/src/bin/travis/publishSnapshot.sh b/src/bin/travis/publishSnapshot.sh deleted file mode 100755 index 9c18a97fd..000000000 --- a/src/bin/travis/publishSnapshot.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -set -e - -echo "BUILD_TYPE='$BUILD_TYPE'" -echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'" -echo "TRAVIS_PULL_REQUEST='$TRAVIS_PULL_REQUEST'" - -if [ "$BUILD_TYPE" == "sbt" ] && [ "$TRAVIS_BRANCH" == "develop" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ]; then - sbt 'set test in Test := {}' publish -fi diff --git a/src/bin/travis/resources/centaur.inputs b/src/bin/travis/resources/centaur.inputs index c378c0f71..544aa8524 100644 --- a/src/bin/travis/resources/centaur.inputs +++ b/src/bin/travis/resources/centaur.inputs @@ -1,7 +1,7 @@ { - "centaur.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", - "centaur.centaur.cromwell_branch":"BRANCH", - "centaur.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", - "centaur.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", - "centaur.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" + "centaur_workflow.centaur.cromwell_jar":"gs://cloud-cromwell-dev/travis-centaur/CROMWELL_JAR", + "centaur_workflow.centaur.centaur_branch":"CENTAUR_BRANCH", + "centaur_workflow.centaur.conf":"gs://cloud-cromwell-dev/travis-centaur/multiBackend.conf", + "centaur_workflow.centaur.pem":"gs://cloud-cromwell-dev/travis-centaur/cromwell-account.pem", + "centaur_workflow.centaur.token": "gs://cloud-cromwell-dev/travis-centaur/token.txt" } diff --git a/src/bin/travis/resources/centaur.wdl b/src/bin/travis/resources/centaur.wdl index 62e13c98c..230b271aa 100644 --- a/src/bin/travis/resources/centaur.wdl +++ b/src/bin/travis/resources/centaur.wdl @@ -1,5 +1,5 @@ task centaur { - String cromwell_branch + String centaur_branch File conf File pem File cromwell_jar @@ -11,7 +11,9 @@ task centaur { export SBT_OPTS=-Dsbt.ivy.home=/cromwell_root/tmp/.ivy2 git clone https://github.com/broadinstitute/centaur.git cd centaur - ./test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t ${secret} -elocaldockertest + git checkout ${centaur_branch} + cd .. + centaur/test_cromwell.sh -j${cromwell_jar} -c${conf} -r/cromwell_root -t ${secret} -elocaldockertest >>> output { @@ -26,6 +28,6 @@ task centaur { failOnStderr: false } } -workflow centaur { +workflow centaur_workflow { call centaur } diff --git a/src/bin/travis/resources/local_centaur.conf b/src/bin/travis/resources/local_centaur.conf index 68ba866bf..ef6df3975 100644 --- a/src/bin/travis/resources/local_centaur.conf +++ b/src/bin/travis/resources/local_centaur.conf @@ -16,3 +16,6 @@ spray.can { call-caching { enabled = true } + +backend.providers.Local.config.filesystems.local.localization = ["soft-link", "copy"] +backend.providers.Local.config.concurrent-job-limit = 20 diff --git a/src/bin/travis/test.sh b/src/bin/travis/test.sh index 8730db3af..9fd024c69 100755 --- a/src/bin/travis/test.sh +++ b/src/bin/travis/test.sh @@ -11,6 +11,8 @@ elif [ "$BUILD_TYPE" = "centaurLocal" ]; then "${SCRIPT_DIR}"/testCentaurLocal.sh elif [ "$BUILD_TYPE" = "sbt" ]; then "${SCRIPT_DIR}"/testSbt.sh +elif [ "$BUILD_TYPE" = "checkPublish" ]; then + "${SCRIPT_DIR}"/testCheckPublish.sh else echo "Unknown BUILD_TYPE: '$BUILD_TYPE'" exit 1 diff --git a/src/bin/travis/testCentaurJes.sh b/src/bin/travis/testCentaurJes.sh index 04813698d..96e5e8e8b 100755 --- a/src/bin/travis/testCentaurJes.sh +++ b/src/bin/travis/testCentaurJes.sh @@ -54,7 +54,7 @@ echo "RUNNING TRAVIS CENTAUR" sbt assembly # Update the inputs file with stuff specific to this run -sed -i "s/BRANCH/${TRAVIS_BRANCH}/g" src/bin/travis/resources/centaur.inputs +sed -i "s/CENTAUR_BRANCH/${CENTAUR_BRANCH}/g" src/bin/travis/resources/centaur.inputs CROMWELL_JAR=cromwell_${TRAVIS_BUILD_ID}.jar sed -i "s/CROMWELL_JAR/${CROMWELL_JAR}/g" src/bin/travis/resources/centaur.inputs @@ -69,9 +69,9 @@ EXIT_CODE="${PIPESTATUS[0]}" export WORKFLOW_ID=`grep "SingleWorkflowRunnerActor: Workflow submitted " log.txt | perl -pe 's/\e\[?.*?[\@-~]//g' | cut -f7 -d" "` # Grab the Centaur log from GCS and cat it so we see it in the main travis log. -export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur//cromwell_root/logs/centaur.log" +export CENTAUR_LOG_PATH="gs://cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/cromwell_root/logs/centaur.log" gsutil cp ${CENTAUR_LOG_PATH} centaur.log cat centaur.log -echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur/${WORKFLOW_ID}/call-centaur/" +echo "More logs for this run are available at https://console.cloud.google.com/storage/browser/cloud-cromwell-dev/cromwell_execution/travis/centaur_workflow/${WORKFLOW_ID}/call-centaur/" exit "${EXIT_CODE}" diff --git a/src/bin/travis/testCentaurLocal.sh b/src/bin/travis/testCentaurLocal.sh index fe77a1347..337c302bf 100755 --- a/src/bin/travis/testCentaurLocal.sh +++ b/src/bin/travis/testCentaurLocal.sh @@ -34,4 +34,6 @@ CROMWELL_JAR=$(find "$(pwd)/target/scala-2.11" -name "cromwell-*.jar") LOCAL_CONF="$(pwd)/src/bin/travis/resources/local_centaur.conf" git clone https://github.com/broadinstitute/centaur.git cd centaur -./test_cromwell.sh -j"${CROMWELL_JAR}" -c${LOCAL_CONF} +git checkout ${CENTAUR_BRANCH} +cd .. +centaur/test_cromwell.sh -j"${CROMWELL_JAR}" -c${LOCAL_CONF} diff --git a/src/bin/travis/testCheckPublish.sh b/src/bin/travis/testCheckPublish.sh new file mode 100755 index 000000000..541b55385 --- /dev/null +++ b/src/bin/travis/testCheckPublish.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +set -e +set -x + +sbt clean assembly doc diff --git a/src/main/scala/cromwell/CromwellCommandLine.scala b/src/main/scala/cromwell/CromwellCommandLine.scala index c52ebbd66..230045559 100644 --- a/src/main/scala/cromwell/CromwellCommandLine.scala +++ b/src/main/scala/cromwell/CromwellCommandLine.scala @@ -6,7 +6,7 @@ import better.files._ import cats.data.Validated._ import cats.syntax.cartesian._ import cats.syntax.validated._ -import cromwell.core.WorkflowSourceFiles +import cromwell.core.{WorkflowSourceFilesWithoutImports, WorkflowSourceFilesCollection, WorkflowSourceFilesWithDependenciesZip} import cromwell.util.FileUtil._ import lenthall.exception.MessageAggregation import cromwell.core.ErrorOr._ @@ -17,7 +17,7 @@ sealed abstract class CromwellCommandLine case object UsageAndExit extends CromwellCommandLine case object RunServer extends CromwellCommandLine final case class RunSingle(wdlPath: Path, - sourceFiles: WorkflowSourceFiles, + sourceFiles: WorkflowSourceFilesCollection, inputsPath: Option[Path], optionsPath: Option[Path], metadataPath: Option[Path]) extends CromwellCommandLine @@ -26,7 +26,7 @@ object CromwellCommandLine { def apply(args: Seq[String]): CromwellCommandLine = { args.headOption match { case Some("server") if args.size == 1 => RunServer - case Some("run") if args.size >= 2 && args.size <= 5 => RunSingle(args.tail) + case Some("run") if args.size >= 2 && args.size <= 6 => RunSingle(args.tail) case _ => UsageAndExit } } @@ -38,15 +38,19 @@ object RunSingle { val inputsPath = argPath(args, 1, Option(".inputs"), checkDefaultExists = false) val optionsPath = argPath(args, 2, Option(".options"), checkDefaultExists = true) val metadataPath = argPath(args, 3, None) + val importPath = argPath(args, 4, None) val wdl = readContent("WDL file", wdlPath) val inputsJson = readJson("Inputs", inputsPath) val optionsJson = readJson("Workflow Options", optionsPath) - val sourceFiles = (wdl |@| inputsJson |@| optionsJson) map { WorkflowSourceFiles.apply } + val sourceFileCollection = importPath match { + case Some(p) => (wdl |@| inputsJson |@| optionsJson) map { (w, i, o) => WorkflowSourceFilesWithDependenciesZip.apply(w, i, o, Files.readAllBytes(p)) } + case None => (wdl |@| inputsJson |@| optionsJson) map WorkflowSourceFilesWithoutImports.apply + } val runSingle = for { - sources <- sourceFiles + sources <- sourceFileCollection _ <- writeableMetadataPath(metadataPath) } yield RunSingle(wdlPath, sources, inputsPath, optionsPath, metadataPath) diff --git a/src/main/scala/cromwell/Main.scala b/src/main/scala/cromwell/Main.scala index 74a5405c1..c52875572 100644 --- a/src/main/scala/cromwell/Main.scala +++ b/src/main/scala/cromwell/Main.scala @@ -74,11 +74,9 @@ object Main extends App { import PromiseActor.EnhancedActorRef - val promise = runner.askNoTimeout(RunWorkflow) - waitAndExit(promise, CromwellSystem) + waitAndExit(runner.askNoTimeout(RunWorkflow), CromwellSystem) } - private def waitAndExit(futureResult: Future[Any], workflowManagerSystem: CromwellSystem): Unit = { Await.ready(futureResult, Duration.Inf) @@ -100,17 +98,21 @@ object Main extends App { |java -jar cromwell.jar | |Actions: - |run [ [ - | []]] + |run [] [] + | [] [] | | Given a WDL file and JSON file containing the value of the | workflow inputs, this will run the workflow locally and | print out the outputs in JSON format. The workflow | options file specifies some runtime configuration for the | workflow (see README for details). The workflow metadata - | output is an optional file path to output the metadata. + | output is an optional file path to output the metadata. The + | directory of WDL files is optional. However, it is required + | if the primary workflow imports workflows that are outside + | of the root directory of the Cromwell project. + | | Use a single dash ("-") to skip optional files. Ex: - | run noinputs.wdl - - metadata.json + | run noinputs.wdl - - metadata.json - | |server | diff --git a/src/test/scala/cromwell/CromwellCommandLineSpec.scala b/src/test/scala/cromwell/CromwellCommandLineSpec.scala index 42f03fdfc..66c8ee29c 100644 --- a/src/test/scala/cromwell/CromwellCommandLineSpec.scala +++ b/src/test/scala/cromwell/CromwellCommandLineSpec.scala @@ -1,9 +1,9 @@ package cromwell import better.files._ -import cromwell.core.PathFactory._ +import cromwell.core.path.PathImplicits._ import cromwell.util.SampleWdl -import cromwell.util.SampleWdl.ThreeStep +import cromwell.util.SampleWdl.{FileClobber, FilePassingWorkflow, ThreeStep} import org.scalatest.{FlatSpec, Matchers} import scala.util.Try @@ -30,7 +30,7 @@ class CromwellCommandLineSpec extends FlatSpec with Matchers { } it should "fail with too many arguments to run" in { - CromwellCommandLine(List("run", "bork", "bork", "bork", "bork", "bork")) + CromwellCommandLine(List("run", "bork", "bork", "bork", "bork", "bork", "blerg")) } it should "RunSingle when supplying wdl and inputs" in { @@ -76,6 +76,23 @@ class CromwellCommandLineSpec extends FlatSpec with Matchers { ccl.isFailure shouldBe true ccl.failed.get.getMessage should include("Unable to write to metadata directory:") } + + it should "run if imports directory is a .zip file" in { + val wdlDir = File.newTemporaryDirectory("wdlDirectory") + + val filePassing = File.newTemporaryFile("filePassing", ".wdl", Option(wdlDir)) + val fileClobber = File.newTemporaryFile("fileClobber", ".wdl", Option(wdlDir)) + filePassing write FilePassingWorkflow.wdlSource() + fileClobber write FileClobber.wdlSource() + + val zippedDir = wdlDir.zip() + val zippedPath = zippedDir.pathAsString + + val ccl = Try(CromwellCommandLine(List("run", filePassing.pathAsString, "-", "-", "-", zippedPath))) + ccl.isFailure shouldBe false + + zippedDir.delete(swallowIOExceptions = true) + } } object CromwellCommandLineSpec { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala index 75f2f779a..3c8e48fd4 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorBackendFactory.scala @@ -5,10 +5,10 @@ import com.typesafe.config.Config import com.typesafe.scalalogging.StrictLogging import cromwell.backend._ import cromwell.backend.impl.htcondor.caching.CacheActorFactory -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.SharedFileSystemExpressionFunctions import cromwell.core.{CallContext, WorkflowOptions} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions import scala.util.{Failure, Success, Try} @@ -17,7 +17,7 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend extends BackendLifecycleActorFactory with StrictLogging { override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(HtCondorInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } @@ -32,14 +32,14 @@ case class HtCondorBackendFactory(name: String, configurationDescriptor: Backend override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toAbsolutePath.toString, jobPaths.stderr.toAbsolutePath.toString ) - new SharedFileSystemExpressionFunctions(HtCondorJobExecutionActor.fileSystems, callContext) + new SharedFileSystemExpressionFunctions(HtCondorJobExecutionActor.pathBuilders, callContext) } private def resolveCacheProviderProps(workflowOptions: WorkflowOptions) = { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala index b0fdc75b2..1212876b6 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActor.scala @@ -7,9 +7,9 @@ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} import cromwell.core.WorkflowOptions +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue -import wdl4s.Call import scala.concurrent.Future import scala.util.Try @@ -19,14 +19,14 @@ object HtCondorInitializationActor { ContinueOnReturnCodeKey, CpuKey, MemoryKey, DiskKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef): Props = Props(new HtCondorInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } class HtCondorInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], override val configurationDescriptor: BackendConfigurationDescriptor, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala index 400c6a55f..d005016ca 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActor.scala @@ -1,20 +1,22 @@ package cromwell.backend.impl.htcondor -import java.nio.file.FileSystems import java.nio.file.attribute.PosixFilePermission import java.util.UUID import akka.actor.{ActorRef, Props} -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend._ import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.localization.CachedResultLocalization -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} +import cromwell.backend.wdl.Command +import cromwell.core.path.JavaWriterImplicits._ +import cromwell.core.path.{DefaultPathBuilder, PathBuilder} import cromwell.services.keyvalue.KeyValueServiceActor._ import cromwell.services.metadata.CallMetadataKeys import org.apache.commons.codec.digest.DigestUtils -import wdl4s._ +import wdl4s.EvaluatedTaskInputs import wdl4s.parser.MemoryUnit import wdl4s.types.{WdlArrayType, WdlFileType} import wdl4s.util.TryUtil @@ -27,7 +29,7 @@ import scala.util.{Failure, Success, Try} object HtCondorJobExecutionActor { val HtCondorJobIdKey = "htCondor_job_id" - val fileSystems = List(FileSystems.getDefault) + val pathBuilders = List(DefaultPathBuilder) def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef, cacheActorProps: Option[Props]): Props = Props(new HtCondorJobExecutionActor(jobDescriptor, configurationDescriptor, serviceRegistryActor, cacheActorProps)) @@ -41,9 +43,9 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor import HtCondorJobExecutionActor._ import better.files._ - import cromwell.core.PathFactory._ private val tag = s"CondorJobExecutionActor-${jobDescriptor.call.fullyQualifiedName}:" + override val pathBuilders: List[PathBuilder] = HtCondorJobExecutionActor.pathBuilders implicit val executionContext = context.dispatcher @@ -53,7 +55,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private val fileSystemsConfig = configurationDescriptor.backendConfig.getConfig("filesystems") override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + private val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, configurationDescriptor.backendConfig) // Files private val executionDir = jobPaths.callExecutionRoot @@ -72,9 +74,9 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private lazy val stderrWriter = extProcess.tailedWriter(100, submitFileStderr) private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) + private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, pathBuilders) - private val lookup = jobDescriptor.inputs.apply _ + private val lookup = jobDescriptor.fullyQualifiedInputs.apply _ private val runtimeAttributes = { val evaluateAttrs = call.task.runtimeAttributes.attrs mapValues (_.evaluate(lookup, callEngineFunction)) @@ -202,18 +204,18 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor condorJobId = Option(overallJobIdentifier) self ! TrackTaskStatus(overallJobIdentifier) - case _ => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + case _ => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException("Failed to retrieve job(id) and cluster id"), Option(condorReturnCode))) } case 0 => log.error(s"Unexpected! Received return code for condor submission as 0, although stderr file is non-empty: {}", File(submitFileStderr).lines) - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. HtCondor returned zero status code but non empty stderr file: $condorReturnCode"), Option(condorReturnCode))) case nonZeroExitCode: Int => - self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. HtCondor returned non zero status code: $condorReturnCode"), Option(condorReturnCode))) } } @@ -229,16 +231,16 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor context.system.scheduler.scheduleOnce(pollingInterval.seconds, self, TrackTaskStatus(jobIdentifier)) () case Success(Some(rc)) if runtimeAttributes.continueOnReturnCode.continueFor(rc) => self ! JobExecutionResponse(processSuccess(rc)) - case Success(Some(rc)) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, + case Success(Some(rc)) => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException("Job exited with invalid return code: " + rc), Option(rc))) - case Failure(error) => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, error, None)) + case Failure(error) => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, error, None)) } } private def processSuccess(rc: Int): BackendJobExecutionResponse = { evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { case Success(outputs) => - val succeededResponse = SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) + val succeededResponse = JobSucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) log.debug("{} Storing data into cache for hash {}.", tag, jobHash) // If cache fails to store data for any reason it should not stop the workflow/task execution but log the issue. cacheActor foreach { _ ! StoreExecutionResult(jobHash, succeededResponse) } @@ -247,12 +249,12 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor val message = Option(e.getMessage) map { ": " + _ } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) + JobFailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) } } private def calculateHash: String = { - val cmd = call.task.instantiateCommand(jobDescriptor.inputs, callEngineFunction, identity) match { + val cmd = Command.instantiate(jobDescriptor, callEngineFunction) match { case Success(command) => command case Failure(ex) => val errMsg = s"$tag Cannot instantiate job command for caching purposes due to ${ex.getMessage}." @@ -276,7 +278,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, runtimeAttributes.dockerImage.isDefined, fileSystems, jobDescriptor.inputs) flatMap { + val command = localizeInputs(jobPaths.callInputsRoot, runtimeAttributes.dockerImage.isDefined)(jobDescriptor.inputDeclarations) flatMap { localizedInputs => resolveJobCommand(localizedInputs) } @@ -296,7 +298,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor HtCondorRuntimeKeys.Disk -> runtimeAttributes.disk.to(MemoryUnit.KB).amount.toLong ) - cmds.generateSubmitFile(submitFilePath, attributes) // This writes the condor submit file + cmds.generateSubmitFile(submitFilePath, attributes, runtimeAttributes.nativeSpecs) // This writes the condor submit file () } catch { @@ -306,7 +308,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor } } - private def resolveJobCommand(localizedInputs: CallInputs): Try[String] = { + private def resolveJobCommand(localizedInputs: EvaluatedTaskInputs): Try[String] = { val command = if (runtimeAttributes.dockerImage.isDefined) { modifyCommandForDocker(call.task.instantiateCommand(localizedInputs, callEngineFunction, identity), localizedInputs) } else { @@ -329,7 +331,7 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) } - private def modifyCommandForDocker(jobCmd: Try[String], localizedInputs: CallInputs): Try[String] = { + private def modifyCommandForDocker(jobCmd: Try[String], localizedInputs: EvaluatedTaskInputs): Try[String] = { Try { val dockerInputDataVol = localizedInputs.values.collect { case file if file.wdlType == WdlFileType => @@ -362,16 +364,16 @@ class HtCondorJobExecutionActor(override val jobDescriptor: BackendJobDescriptor createExecutionFolderAndScript() executeTask() } catch { - case e: Exception => self ! JobExecutionResponse(FailedNonRetryableResponse(jobDescriptor.key, e, None)) + case e: Exception => self ! JobExecutionResponse(JobFailedNonRetryableResponse(jobDescriptor.key, e, None)) } } - private def localizeCachedResponse(succeededResponse: SucceededResponse): BackendJobExecutionResponse = { + private def localizeCachedResponse(succeededResponse: JobSucceededResponse): BackendJobExecutionResponse = { Try(localizeCachedOutputs(executionDir, succeededResponse.jobOutputs)) match { case Success(outputs) => executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) - SucceededResponse(jobDescriptor.key, succeededResponse.returnCode, outputs, None, Seq.empty) - case Failure(exception) => FailedNonRetryableResponse(jobDescriptor.key, exception, None) + JobSucceededResponse(jobDescriptor.key, succeededResponse.returnCode, outputs, None, Seq.empty) + case Failure(exception) => JobFailedNonRetryableResponse(jobDescriptor.key, exception, None) } } } diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala index f8dd9a595..758d2dbb4 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributes.scala @@ -12,8 +12,8 @@ import cromwell.backend.validation.RuntimeAttributesValidation._ import cromwell.core._ import cromwell.core.ErrorOr._ import lenthall.exception.MessageAggregation -import wdl4s.types.{WdlIntegerType, WdlStringType, WdlBooleanType, WdlType} -import wdl4s.values.{WdlString, WdlBoolean, WdlInteger, WdlValue} +import wdl4s.types._ +import wdl4s.values.{WdlArray, WdlBoolean, WdlInteger, WdlString, WdlValue} object HtCondorRuntimeAttributes { @@ -26,6 +26,7 @@ object HtCondorRuntimeAttributes { val DockerWorkingDirKey = "dockerWorkingDir" val DockerOutputDirKey = "dockerOutputDir" val DiskKey = "disk" + val NativeSpecsKey = "nativeSpecs" val staticDefaults = Map( FailOnStderrKey -> WdlBoolean(FailOnStderrDefaultValue), @@ -43,7 +44,8 @@ object HtCondorRuntimeAttributes { DockerOutputDirKey -> Set(WdlStringType), CpuKey -> Set(WdlIntegerType), MemoryKey -> Set(WdlStringType), - DiskKey -> Set(WdlStringType) + DiskKey -> Set(WdlStringType), + NativeSpecsKey -> Set(WdlArrayType(WdlStringType)) ) def apply(attrs: Map[String, WdlValue], options: WorkflowOptions): HtCondorRuntimeAttributes = { @@ -59,9 +61,10 @@ object HtCondorRuntimeAttributes { val cpu = validateCpu(withDefaultValues.get(CpuKey), noValueFoundFor(CpuKey)) val memory = validateMemory(withDefaultValues.get(MemoryKey), noValueFoundFor(MemoryKey)) val disk = validateDisk(withDefaultValues.get(DiskKey), noValueFoundFor(DiskKey)) + val nativeSpecs = validateNativeSpecs(withDefaultValues.get(NativeSpecsKey), None.validNel) - (continueOnReturnCode |@| docker |@| dockerWorkingDir |@| dockerOutputDir |@| failOnStderr |@| cpu |@| memory |@| disk) map { - new HtCondorRuntimeAttributes(_, _, _, _, _, _, _, _) + (continueOnReturnCode |@| docker |@| dockerWorkingDir |@| dockerOutputDir |@| failOnStderr |@| cpu |@| memory |@| disk |@| nativeSpecs) map { + new HtCondorRuntimeAttributes(_, _, _, _, _, _, _, _, _) } match { case Valid(x) => x case Invalid(nel) => throw new RuntimeException with MessageAggregation { @@ -97,6 +100,17 @@ object HtCondorRuntimeAttributes { case None => onMissingKey } } + + private def validateNativeSpecs(value: Option[WdlValue], onMissingKey: => ErrorOr[Option[Array[String]]]): ErrorOr[Option[Array[String]]] = { + val nativeSpecsWrongFormatMsg = s"Expecting $NativeSpecsKey runtime attribute to be an Array of Strings. Exception: %s" + value match { + case Some(ns: WdlArray) if ns.wdlType.memberType.equals(WdlStringType) => + val nsa = ns.value.map { value => value.valueString }.toArray + Option(nsa).validNel + case Some(_) => String.format(nativeSpecsWrongFormatMsg, "Not supported WDL type value").invalidNel + case None => onMissingKey + } + } } case class HtCondorRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, @@ -106,4 +120,5 @@ case class HtCondorRuntimeAttributes(continueOnReturnCode: ContinueOnReturnCode, failOnStderr: Boolean, cpu: Int, memory: MemorySize, - disk: MemorySize) + disk: MemorySize, + nativeSpecs: Option[Array[String]]) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala index 76a81c08a..35af6dd1d 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/HtCondorWrapper.scala @@ -5,8 +5,9 @@ import java.nio.file.{Files, Path} import better.files._ import com.typesafe.scalalogging.StrictLogging import cromwell.backend.impl.htcondor -import cromwell.core.PathFactory.{EnhancedPath, FlushingAndClosingWriter} -import cromwell.core.{TailedWriter, UntailedWriter} +import cromwell.core.path.{TailedWriter, UntailedWriter} +import cromwell.core.path.PathImplicits._ +import cromwell.core.path.JavaWriterImplicits._ import scala.sys.process._ @@ -55,22 +56,28 @@ class HtCondorCommands extends StrictLogging { */ def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path): Unit = { logger.debug(s"Writing bash script for execution. Command: $instantiatedCommand.") - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) + val scriptBody = s""" + +#!/bin/sh +cd $containerRoot +$instantiatedCommand +echo $$? > rc + +""".trim + "\n" + File(filePath).write(scriptBody) () } - def generateSubmitFile(path: Path, attributes: Map[String, Any]): String = { + def generateSubmitFile(path: Path, attributes: Map[String, Any], nativeSpecs: Option[Array[String]]): String = { def htCondorSubmitCommand(filePath: Path) = { s"${HtCondorCommands.Submit} ${filePath.toString}" } val submitFileWriter = path.untailed - attributes.foreach(attribute => submitFileWriter.writeWithNewline(s"${attribute._1}=${attribute._2}")) + attributes.foreach { attribute => submitFileWriter.writeWithNewline(s"${attribute._1}=${attribute._2}") } + //Native specs is intended for attaching HtCondor native configuration such as 'requirements' and 'rank' definition + //directly to the submit file. + nativeSpecs foreach { _.foreach { submitFileWriter.writeWithNewline } } submitFileWriter.writeWithNewline(HtCondorRuntimeKeys.Queue) submitFileWriter.writer.flushAndClose() logger.debug(s"submit file name is : $path") diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala index 70799bd67..74fb3ade5 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/CacheActor.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.htcondor.caching import akka.actor.{Actor, ActorLogging} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult @@ -10,10 +10,10 @@ object CacheActor { trait CacheActorCommand case class ReadExecutionResult(hash: String) extends CacheActorCommand - case class StoreExecutionResult(hash: String, succeededResponse: SucceededResponse) extends CacheActorCommand + case class StoreExecutionResult(hash: String, succeededResponse: JobSucceededResponse) extends CacheActorCommand trait CacheActorResponse - case class ExecutionResultFound(succeededResponse: SucceededResponse) extends CacheActorResponse + case class ExecutionResultFound(succeededResponse: JobSucceededResponse) extends CacheActorResponse case object ExecutionResultNotFound extends CacheActorResponse case class ExecutionResultStored(hash: String) extends CacheActorResponse case object ExecutionResultAlreadyExist extends CacheActorResponse diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala index 2f8254e5c..127141c5b 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalization.scala @@ -23,7 +23,7 @@ trait CachedResultLocalization { WdlSingleFile(slPath.toString) } - def localizeCachedOutputs(executionPath: Path, outputs: JobOutputs): JobOutputs = { + def localizeCachedOutputs(executionPath: Path, outputs: CallOutputs): CallOutputs = { outputs map { case (lqn, jobOutput) => jobOutput.wdlValue.wdlType match { case WdlFileType => (lqn -> JobOutput(localizeCachedFile(executionPath, jobOutput.wdlValue))) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala index fdff70804..1b023fd41 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/model/CachedExecutionResult.scala @@ -1,6 +1,6 @@ package cromwell.backend.impl.htcondor.caching.model -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse -case class CachedExecutionResult(hash: String, succeededResponse: SucceededResponse) +case class CachedExecutionResult(hash: String, succeededResponse: JobSucceededResponse) diff --git a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala index 7f99e1565..cf6cf1151 100644 --- a/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala +++ b/supportedBackends/htcondor/src/main/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActor.scala @@ -4,7 +4,7 @@ import com.mongodb.DBObject import com.mongodb.casbah.MongoCollection import com.mongodb.casbah.commons.{MongoDBObject, TypeImports} import com.mongodb.util.JSON -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.caching.CacheActor import cromwell.backend.impl.htcondor.caching.exception.{CachedResultAlreadyExistException, CachedResultNotFoundException} import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult @@ -55,7 +55,7 @@ class MongoCacheActor(collection: MongoCollection, private def deserializeSucceededResponse(mongoDbObject: TypeImports.DBObject): CachedExecutionResult = { val cachedResult = JsonParser(mongoDbObject.toString).convertTo[MongoCachedExecutionResult] - val succeededResponse = deserialize(cachedResult.succeededResponse.byteArray, classOf[SucceededResponse]) + val succeededResponse = deserialize(cachedResult.succeededResponse.byteArray, classOf[JobSucceededResponse]) CachedExecutionResult(cachedResult.hash, succeededResponse) } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala index ce1097f4e..9a03fecca 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorCommandSpec.scala @@ -5,17 +5,17 @@ import better.files._ import org.scalatest.{Matchers, WordSpecLike} class HtCondorCommandSpec extends WordSpecLike with Matchers { - val attributes = Map("executable" -> "test.sh", "input" -> "/temp/test", "error"->"stderr") - val resultAttributes = List("executable=test.sh","input=/temp/test","error=stderr", "queue") - val htCondorCommands = new HtCondorCommands + private val attributes = Map("executable" -> "test.sh", "input" -> "/temp/test", "error"->"stderr") + private val resultAttributes = List("executable=test.sh","input=/temp/test","error=stderr", "spec1", "spec2", "queue") + private val htCondorCommands = new HtCondorCommands + private val nativeSpecs = Option(Array("spec1", "spec2")) "submitCommand method" should { "return submit file with content passed to it" in { - val dir = File.newTemporaryFile() - val command = htCondorCommands.generateSubmitFile(dir.path,attributes) - val file = dir - resultAttributes shouldEqual dir.lines.toList - dir.delete() + val file = File.newTemporaryFile() + val command = htCondorCommands.generateSubmitFile(file.path, attributes, nativeSpecs) + resultAttributes shouldEqual file.lines.toList + file.delete() command shouldEqual s"condor_submit ${file.path}" } } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala index 9e71d1a14..29d19d117 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorInitializationActorSpec.scala @@ -5,7 +5,7 @@ import cromwell.backend.BackendWorkflowInitializationActor.Initialize import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} import cromwell.core.TestKitSuite import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -29,12 +29,12 @@ class HtCondorInitializationActorSpec extends TestKitSuite("HtCondorInitializati | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getHtCondorBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getHtCondorBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(HtCondorInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) } @@ -43,7 +43,7 @@ class HtCondorInitializationActorSpec extends TestKitSuite("HtCondorInitializati within(Timeout) { EventFilter.warning(message = s"Key/s [proc] is/are not supported by HtCondorBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { proc: 1 }""") - val backend = getHtCondorBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getHtCondorBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, emptyBackendConfig) backend ! Initialize } diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala index 589554e94..33bc5c71c 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorJobExecutionActorSpec.scala @@ -7,15 +7,15 @@ import akka.actor.{Actor, Props} import akka.testkit.{ImplicitSender, TestActorRef} import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.htcondor.caching.CacheActor import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException import cromwell.backend.impl.htcondor.caching.model.CachedExecutionResult -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} import cromwell.core._ +import cromwell.core.path.{PathWriter, TailedWriter, UntailedWriter} import cromwell.services.keyvalue.KeyValueServiceActor.{KvGet, KvPair, KvPut} -import org.mockito.Matchers._ import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout @@ -54,7 +54,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -73,7 +73,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -92,7 +92,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -102,7 +102,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc | root = "local-cromwell-executions" | | docker { - | cmd = "docker run -w %s %s %s %s --rm %s %s" + | cmd = "docker run -w %s %s %s %s --rm %s /bin/bash -c \\"%s\\"" | defaultWorkingDir = "/workingDir/" | defaultOutputDir = "/output/" | } @@ -147,7 +147,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(htCondorProcess, times(1)).untailedWriter(any[Path]) @@ -179,7 +179,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.recover, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } cleanUpJob(jobPaths) @@ -207,7 +207,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(htCondorProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(htCondorProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(htCondorProcess, times(1)).untailedWriter(any[Path]) @@ -237,8 +237,8 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Job exited with invalid return code")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("Job exited with invalid return code")) } cleanUpJob(jobPaths) @@ -271,7 +271,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(911)) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } cleanUpJob(jobPaths) @@ -288,7 +288,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc """.stripMargin val jsonInputFile = createCannedFile("testFile", "some content").pathAsString val inputs = Map( - "inputFile" -> WdlFile(jsonInputFile) + "wf_hello.hello.inputFile" -> WdlFile(jsonInputFile) ) val jobDescriptor = prepareJob(helloWorldWdlWithFileInput, runtime, Option(inputs)) val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) @@ -310,7 +310,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString @@ -318,7 +318,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc assert(bashScript.contains("docker run -w /workingDir/ -v")) assert(bashScript.contains(":/workingDir/")) assert(bashScript.contains(":ro")) - assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest echo")) + assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest /bin/bash -c \"echo")) cleanUpJob(jobPaths) } @@ -346,8 +346,8 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(-1)) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("Could not write the file.")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("Could not write the file.")) } cleanUpJob(jobPaths) @@ -372,7 +372,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc createCannedFile(prefix = "testFile2", contents = "some other content", dir = Some(tempDir2)).pathAsString val inputs = Map( - "inputFiles" -> WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile(jsonInputFile), WdlFile(jsonInputFile2))) + "wf_hello.hello.inputFiles" -> WdlArray(WdlArrayType(WdlFileType), Seq(WdlFile(jsonInputFile), WdlFile(jsonInputFile2))) ) val jobDescriptor = prepareJob(helloWorldWdlWithFileArrayInput, runtime, Option(inputs)) val (job, jobPaths, backendConfigDesc) = (jobDescriptor.jobDescriptor, jobDescriptor.jobPaths, jobDescriptor.backendConfigurationDescriptor) @@ -394,7 +394,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc when(htCondorProcess.jobReturnCode(any[String], any[Path])).thenReturn(Option(0)) whenReady(backend.execute) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] } val bashScript = Source.fromFile(jobPaths.script.toFile).getLines.mkString @@ -403,12 +403,12 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc assert(bashScript.contains(":/workingDir/")) assert(bashScript.contains(tempDir1.toAbsolutePath.toString)) assert(bashScript.contains(tempDir2.toAbsolutePath.toString)) - assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest echo")) + assert(bashScript.contains("/call-hello/execution:/outputDir/ --rm ubuntu/latest /bin/bash -c \"echo")) cleanUpJob(jobPaths) } - private def cleanUpJob(jobPaths: JobPaths): Unit = { + private def cleanUpJob(jobPaths: JobPathsWithDocker): Unit = { File(jobPaths.workflowRoot).delete(true) () } @@ -425,7 +425,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = source, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, ConfigFactory.load) val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), emptyWorkflowOptions, Set.empty) - val jobPaths = new JobPaths(backendWorkflowDescriptor, backendConfig, jobDesc.key) + val jobPaths = new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendConfig) val executionDir = File(jobPaths.callExecutionRoot) val stdout = File(executionDir.pathAsString, "stdout") stdout.createIfNotExists(asDirectory = false, createParents = true) @@ -440,7 +440,7 @@ class HtCondorJobExecutionActorSpec extends TestKitSuite("HtCondorJobExecutionAc TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) } - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) + private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPathsWithDocker, backendConfigurationDescriptor: BackendConfigurationDescriptor) trait MockWriter extends Writer { var closed = false diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala index db95a999e..659d700fa 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/HtCondorRuntimeAttributesSpec.scala @@ -30,7 +30,7 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -39,7 +39,8 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { val memorySize = MemorySize.parse("0.512 GB").get val diskSize = MemorySize.parse("1.024 GB").get - val staticDefaults = new HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), None, None, None, false, 1, memorySize, diskSize) + val staticDefaults = new HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), None, None, None, false, 1, + memorySize, diskSize, None) def workflowOptionsWithDefaultRA(defaults: Map[String, JsValue]) = { WorkflowOptions(JsObject(Map( @@ -219,11 +220,16 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) } - "throw an exception when tries to validate an invalid disk entry" in { + "throw an exception when tries to validate an invalid String disk entry" in { val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" disk: "value" }""").head assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting memory runtime attribute to be an Integer or String with format '8 GB'") } + "throw an exception when tries to validate an invalid Integer array disk entry" in { + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { docker: "ubuntu:latest" disk: [1] }""").head + assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting disk runtime attribute to be an Integer or String with format '8 GB'") + } + "use workflow options as default if disk key is missing" in { val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { }""").head val expectedRuntimeAttributes = staticDefaults.copy(disk = MemorySize.parse("65 GB").get) @@ -237,37 +243,59 @@ class HtCondorRuntimeAttributesSpec extends WordSpecLike with Matchers { val shouldBeIgnored = workflowOptionsWithDefaultRA(Map()) assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, shouldBeIgnored, expectedRuntimeAttributes) } + + "return an instance of itself when tries to validate a valid native specs entry" in { + val expectedRuntimeAttributes = staticDefaults.copy(nativeSpecs = Option(Array("spec1", "spec2"))) + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { nativeSpecs: ["spec1", "spec2"] }""").head + assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes, emptyWorkflowOptions, expectedRuntimeAttributes) + } + + "throw an exception when tries to validate an invalid native specs entry" in { + val runtimeAttributes = createRuntimeAttributes(HelloWorld, """runtime { nativeSpecs: [1, 2] }""").head + assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes, "Expecting nativeSpecs runtime attribute to be an Array of Strings.") + } } - private def assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], workflowOptions: WorkflowOptions, expectedRuntimeAttributes: HtCondorRuntimeAttributes): Unit = { + private def assertHtCondorRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WdlValue], + workflowOptions: WorkflowOptions, + expectedRuntimeAttributes: HtCondorRuntimeAttributes) = { try { - assert(HtCondorRuntimeAttributes(runtimeAttributes, workflowOptions) == expectedRuntimeAttributes) + val actualRuntimeAttr = HtCondorRuntimeAttributes(runtimeAttributes, workflowOptions) + assert(actualRuntimeAttr.cpu == expectedRuntimeAttributes.cpu) + assert(actualRuntimeAttr.disk == expectedRuntimeAttributes.disk) + assert(actualRuntimeAttr.memory == expectedRuntimeAttributes.memory) + assert(actualRuntimeAttr.continueOnReturnCode == expectedRuntimeAttributes.continueOnReturnCode) + assert(actualRuntimeAttr.failOnStderr == expectedRuntimeAttributes.failOnStderr) + assert(actualRuntimeAttr.dockerWorkingDir == expectedRuntimeAttributes.dockerWorkingDir) + assert(actualRuntimeAttr.dockerImage == expectedRuntimeAttributes.dockerImage) + assert(actualRuntimeAttr.dockerOutputDir == expectedRuntimeAttributes.dockerOutputDir) + expectedRuntimeAttributes.nativeSpecs match { + case Some(ns) => assert(ns.deep == expectedRuntimeAttributes.nativeSpecs.get.deep) + case None => assert(expectedRuntimeAttributes.nativeSpecs.isEmpty) + } } catch { case ex: RuntimeException => fail(s"Exception was not expected but received: ${ex.getMessage}") } - () } - private def assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String): Unit = { + private def assertHtCondorRuntimeAttributesFailedCreation(runtimeAttributes: Map[String, WdlValue], exMsg: String) = { try { HtCondorRuntimeAttributes(runtimeAttributes, emptyWorkflowOptions) fail("A RuntimeException was expected.") } catch { case ex: RuntimeException => assert(ex.getMessage.contains(exMsg)) } - () } private def createRuntimeAttributes(wdlSource: WdlSource, runtimeAttributes: String): Seq[Map[String, WdlValue]] = { val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) + call.lookupFunction(knownInputs, NoFunctions) } - workflowDescriptor.workflowNamespace.workflow.calls map { + workflowDescriptor.workflow.taskCalls.toSeq map { call => val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala index c2c4d101f..e0e847e56 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/localization/CachedResultLocalizationSpec.scala @@ -2,7 +2,7 @@ package cromwell.backend.impl.htcondor.caching.localization import java.nio.file.Files -import cromwell.core.{JobOutput, JobOutputs} +import cromwell.core.{JobOutput, CallOutputs} import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} import wdl4s.types.{WdlArrayType, WdlFileType} import wdl4s.values.{WdlArray, WdlSingleFile, WdlString} @@ -33,7 +33,7 @@ class CachedResultLocalizationSpec extends WordSpecLike with Matchers with Befor } "localize cached job outputs which are WDL files using symbolic link" in { - val outputs: JobOutputs = Map("File1" -> JobOutput(WdlSingleFile(defaultCachedFile.toAbsolutePath.toString))) + val outputs: CallOutputs = Map("File1" -> JobOutput(WdlSingleFile(defaultCachedFile.toAbsolutePath.toString))) val newJobOutputs = cachedResults.localizeCachedOutputs(newTmpDir, outputs) newJobOutputs foreach { case (lqn, jobOutput) => assert(jobOutput.wdlValue.valueString == newTmpFile.toString) diff --git a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala index fa675e758..d638b82d9 100644 --- a/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala +++ b/supportedBackends/htcondor/src/test/scala/cromwell/backend/impl/htcondor/caching/provider/mongodb/MongoCacheActorSpec.scala @@ -8,7 +8,7 @@ import com.mongodb.util.JSON import com.mongodb.{DBObject, WriteResult} import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.{MemorySize, BackendJobDescriptorKey} -import cromwell.backend.BackendJobExecutionActor.SucceededResponse +import cromwell.backend.BackendJobExecutionActor.JobSucceededResponse import cromwell.backend.impl.htcondor.HtCondorRuntimeAttributes import cromwell.backend.impl.htcondor.caching.CacheActor._ import cromwell.backend.impl.htcondor.caching.exception.CachedResultNotFoundException @@ -20,7 +20,7 @@ import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.mockito.MockitoSugar import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, MustMatchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.values.WdlString class MongoCacheActorSpec extends TestKit(ActorSystem("MongoCacheProviderActorSpecSystem")) with WordSpecLike with MustMatchers @@ -33,10 +33,10 @@ class MongoCacheActorSpec extends TestKit(ActorSystem("MongoCacheProviderActorSp val mongoDbCollectionMock = mock[MongoCollection] val memorySize = MemorySize.parse("0.512 GB").get val diskSize = MemorySize.parse("1.024 GB").get - val runtimeConfig = HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), Some("tool-name"), Some("/workingDir"), Some("/outputDir"), true, 1, memorySize, diskSize) + val runtimeConfig = HtCondorRuntimeAttributes(ContinueOnReturnCodeSet(Set(0)), Some("tool-name"), Some("/workingDir"), Some("/outputDir"), true, 1, memorySize, diskSize, None) val jobHash = "88dde49db10f1551299fb9937f313c10" val taskStatus = "done" - val succeededResponseMock = SucceededResponse(BackendJobDescriptorKey(Call(None, "TestJob", null, null, null, None), None, 0), None, Map("test" -> JobOutput(WdlString("Test"))), None, Seq.empty) + val succeededResponseMock = JobSucceededResponse(BackendJobDescriptorKey(TaskCall(Option("taskName"), null, null, null), None, 0), None, Map("test" -> JobOutput(WdlString("Test"))), None, Seq.empty) val serSucceededRespMock = KryoSerializedObject(serialize(succeededResponseMock)) val cachedExecutionResult = MongoCachedExecutionResult(jobHash, serSucceededRespMock) val cachedExecutionResultDbObject = JSON.parse(cachedExecutionResult.toJson.toString).asInstanceOf[DBObject] diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala index 7285171b5..574e58153 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/GenomicsFactory.scala @@ -2,22 +2,22 @@ package cromwell.backend.impl.jes import java.net.URL -import com.google.api.client.auth.oauth2.Credential -import com.google.api.client.http.HttpTransport -import com.google.api.client.json.JsonFactory import com.google.api.services.genomics.Genomics +import cromwell.core.WorkflowOptions +import cromwell.filesystems.gcs.auth.GoogleAuthMode -object GenomicsFactory { +case class GenomicsFactory(applicationName: String, authMode: GoogleAuthMode, endpointUrl: URL) { - def apply(applicationName: String, credential: Credential, endpointUrl: URL): Genomics = { - GoogleGenomics.from(applicationName, endpointUrl, credential, credential.getJsonFactory, credential.getTransport) - } + def withOptions(options: WorkflowOptions) = { + val credential = authMode.credential(options) - // Wrapper object around Google's Genomics class providing a convenience 'from' "method" - object GoogleGenomics { - def from(applicationName: String, endpointUrl: URL, credential: Credential, jsonFactory: JsonFactory, httpTransport: HttpTransport): Genomics = { - new Genomics.Builder(httpTransport, jsonFactory, credential).setApplicationName(applicationName).setRootUrl(endpointUrl.toString).build - } + new Genomics.Builder( + credential.getTransport, + credential.getJsonFactory, + credential) + .setApplicationName(applicationName) + .setRootUrl(endpointUrl.toString) + .build } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala index a08be8df9..84da1efbc 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActor.scala @@ -9,29 +9,26 @@ import better.files._ import cats.instances.future._ import cats.syntax.functor._ import com.google.api.client.googleapis.json.GoogleJsonResponseException +import com.google.cloud.storage.contrib.nio.CloudStoragePath import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand import cromwell.backend.async.AsyncBackendJobExecutionActor.{ExecutionMode, JobId} import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionActor, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle, NonRetryableExecution, SuccessfulExecutionHandle} -import cromwell.backend.impl.jes.JesImplicits.PathString import cromwell.backend.impl.jes.JesJobExecutionActor.JesOperationIdKey import cromwell.backend.impl.jes.RunStatus.TerminalRunStatus import cromwell.backend.impl.jes.io._ import cromwell.backend.impl.jes.statuspolling.JesPollingActorClient -import cromwell.backend.{AttemptedLookupResult, BackendJobDescriptor, BackendWorkflowDescriptor, PreemptedException} +import cromwell.backend.wdl.OutputEvaluator +import cromwell.backend.{BackendJobDescriptor, BackendWorkflowDescriptor, PreemptedException} import cromwell.core.Dispatcher.BackendDispatcher import cromwell.core._ import cromwell.core.logging.JobLogging +import cromwell.core.path.proxy.PathProxy import cromwell.core.retry.{Retry, SimpleExponentialBackoff} -import cromwell.filesystems.gcs.NioGcsPath import cromwell.services.keyvalue.KeyValueServiceActor._ import cromwell.services.metadata._ -import wdl4s.AstTools._ -import wdl4s.WdlExpression.ScopedLookupFunction import wdl4s._ -import wdl4s.command.ParameterCommandPart import wdl4s.expression.NoFunctions -import wdl4s.util.TryUtil import wdl4s.values._ import scala.concurrent.duration._ @@ -58,6 +55,7 @@ object JesAsyncBackendJobExecutionActor { object WorkflowOptionKeys { val MonitoringScript = "monitoring_script" val GoogleProject = "google_project" + val GoogleComputeServiceAccount = "google_compute_service_account" } @@ -68,7 +66,7 @@ object JesAsyncBackendJobExecutionActor { * ask them for results. */ case class JesPendingExecutionHandle(jobDescriptor: BackendJobDescriptor, - jesOutputs: Seq[JesFileOutput], + jesOutputs: Set[JesFileOutput], run: Run, previousStatus: Option[RunStatus]) extends ExecutionHandle { override val isDone = false @@ -92,20 +90,20 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes override val pollingActor = jesBackendSingletonActor override lazy val pollBackOff = SimpleExponentialBackoff( - initialInterval = 30 seconds, maxInterval = 10 minutes, multiplier = 1.1) + initialInterval = 30 seconds, maxInterval = jesAttributes.maxPollingInterval seconds, multiplier = 1.1) override lazy val executeOrRecoverBackOff = SimpleExponentialBackoff( initialInterval = 3 seconds, maxInterval = 20 seconds, multiplier = 1.1) - private lazy val workflowDescriptor = jobDescriptor.workflowDescriptor + override lazy val workflowDescriptor = jobDescriptor.workflowDescriptor private lazy val call = jobDescriptor.key.call override lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible private lazy val cmdInput = - JesFileInput(ExecParamName, jesCallPaths.gcsExecPath.toString, Paths.get(jesCallPaths.gcsExecFilename), workingDisk) + JesFileInput(ExecParamName, jesCallPaths.script.toUri.toString, Paths.get(jesCallPaths.scriptFilename), workingDisk) private lazy val jesCommandLine = s"/bin/bash ${cmdInput.containerPath}" - private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.toString, Paths.get(returnCodeFilename), workingDisk) + private lazy val rcJesOutput = JesFileOutput(returnCodeFilename, returnCodeGcsPath.toUri.toString, Paths.get(returnCodeFilename), workingDisk) private lazy val standardParameters = Seq(rcJesOutput) private lazy val returnCodeContents = Try(File(returnCodeGcsPath).contentAsString) @@ -129,26 +127,19 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes override def receive: Receive = pollingActorClientReceive orElse jesReceiveBehavior orElse super.receive - private def globOutputPath(glob: String) = callRootPath.resolve(s"glob-${glob.md5Sum}/") - private def gcsAuthParameter: Option[JesInput] = { - if (jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerConfiguration.isDefined) - Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.gcsAuthFilePath.toString)) + if (jesAttributes.auths.gcs.requiresAuthFile || dockerConfiguration.isDefined) + Option(JesLiteralInput(ExtraConfigParamName, jesCallPaths.gcsAuthFilePath.toUri.toString)) else None } private lazy val callContext = CallContext( callRootPath, - jesStdoutFile.toString, - jesStderrFile.toString + jesStdoutFile.toUri.toString, + jesStderrFile.toUri.toString ) - private[jes] lazy val callEngineFunctions = new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), callContext) - - private val lookup: ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations - WdlExpression.standardLookupFunction(jobDescriptor.inputs, declarations, callEngineFunctions) - } + private[jes] lazy val callEngineFunctions = new JesExpressionFunctions(List(jesCallPaths.gcsPathBuilder), callContext) /** * Takes two arrays of remote and local WDL File paths and generates the necessary JesInputs. @@ -170,43 +161,28 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * relativeLocalizationPath("gs://some/bucket/foo.txt") -> "some/bucket/foo.txt" */ private def relativeLocalizationPath(file: WdlFile): WdlFile = { - Try(getPath(file.value)) match { - case Success(gcsPath: NioGcsPath) => WdlFile(gcsPath.bucket + "/" + gcsPath.objectName, file.isGlob) - case Success(gcsPath) => file - case Failure(e) => file + getPath(file.value) match { + case Success(path) => + val value: WdlSource = path.toUri.getHost + path.toUri.getPath + WdlFile(value, file.isGlob) + case _ => file } } - private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Iterable[JesInput] = { - /** - * Commands in WDL tasks can also generate input files. For example: ./my_exec --file=${write_lines(arr)} - * - * write_lines(arr) would produce a string-ified version of the array stored as a GCS path. The next block of code - * will go through each ${...} expression within the task's command section and find all write_*() ASTs and - * evaluate them so the files are written to GCS and the they can be included as inputs to Google's Pipeline object - */ - val commandExpressions = jobDescriptor.key.scope.task.commandTemplate.collect({ - case x: ParameterCommandPart => x.expression - }) - - val writeFunctionAsts = commandExpressions.map(_.ast).flatMap(x => AstTools.findAsts(x, "FunctionCall")).collect({ - case y if y.getAttribute("name").sourceString.startsWith("write_") => y - }) + private[jes] def generateJesInputs(jobDescriptor: BackendJobDescriptor): Set[JesInput] = { - val evaluatedExpressionMap = writeFunctionAsts map { ast => - val expression = WdlExpression(ast) - val value = expression.evaluate(lookup, callEngineFunctions) - expression.toWdlString.md5SumShort -> value - } toMap - - val writeFunctionFiles = evaluatedExpressionMap collect { case (k, v: Success[_]) => k -> v.get } collect { case (k, v: WdlFile) => k -> Seq(v)} + val writeFunctionFiles = call.task.evaluateFilesFromCommand(jobDescriptor.fullyQualifiedInputs, callEngineFunctions) map { + case (expression, file) => expression.toWdlString.md5SumShort -> Seq(file) + } - /** Collect all WdlFiles from inputs to the call */ - val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.inputs mapValues { _.collectAsSeq { case w: WdlFile => w } } + /* Collect all WdlFiles from inputs to the call */ + val callInputFiles: Map[FullyQualifiedName, Seq[WdlFile]] = jobDescriptor.fullyQualifiedInputs mapValues { _.collectAsSeq { case w: WdlFile => w } } - (callInputFiles ++ writeFunctionFiles) flatMap { + val inputs = (callInputFiles ++ writeFunctionFiles) flatMap { case (name, files) => jesInputsFromWdlFiles(name, files, files.map(relativeLocalizationPath), jobDescriptor) } + + inputs.toSet } /** @@ -236,33 +212,56 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes if (referenceName.length <= 127) referenceName else referenceName.md5Sum } - private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Seq[JesFileOutput] = { - val wdlFileOutputs = jobDescriptor.key.scope.task.outputs flatMap { taskOutput => - taskOutput.requiredExpression.evaluateFiles(lookup, NoFunctions, taskOutput.wdlType) match { - case Success(wdlFiles) => wdlFiles map relativeLocalizationPath - case Failure(ex) => - jobLogger.warn(s"Could not evaluate $taskOutput: ${ex.getMessage}", ex) - Seq.empty[WdlFile] - } + private[jes] def findGlobOutputs(jobDescriptor: BackendJobDescriptor): Set[WdlGlobFile] = { + val globOutputs = (call.task.findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) map relativeLocalizationPath) collect { + case glob: WdlGlobFile => glob } + globOutputs.distinct.toSet + } + + private[jes] def generateJesOutputs(jobDescriptor: BackendJobDescriptor): Set[JesFileOutput] = { + val wdlFileOutputs = call.task.findOutputFiles(jobDescriptor.fullyQualifiedInputs, NoFunctions) map relativeLocalizationPath - // Create the mappings. GLOB mappings require special treatment (i.e. stick everything matching the glob in a folder) - wdlFileOutputs.distinct map { wdlFile => - val destination = wdlFile match { - case WdlSingleFile(filePath) => callRootPath.resolve(filePath).toString - case WdlGlobFile(filePath) => globOutputPath(filePath).toString + val outputs = wdlFileOutputs.distinct flatMap { wdlFile => + wdlFile match { + case singleFile: WdlSingleFile => List(generateJesSingleFileOutputs(singleFile)) + case globFile: WdlGlobFile => generateJesGlobFileOutputs(globFile) } - val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) - JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) } + + outputs.toSet + } + + private def generateJesSingleFileOutputs(wdlFile: WdlSingleFile): JesFileOutput = { + val destination = callRootPath.resolve(wdlFile.value.stripPrefix("/")).toUri.toString + val (relpath, disk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + JesFileOutput(makeSafeJesReferenceName(wdlFile.value), destination, relpath, disk) + } + + private def generateJesGlobFileOutputs(wdlFile: WdlGlobFile): List[JesFileOutput] = { + val globName = callEngineFunctions.globName(wdlFile.value) + val globDirectory = globName + "/" + val globListFile = globName + ".list" + val gcsGlobDirectoryDestinationPath = callRootPath.resolve(globDirectory).toUri.toString + val gcsGlobListFileDestinationPath = callRootPath.resolve(globListFile).toUri.toString + + val (_, globDirectoryDisk) = relativePathAndAttachedDisk(wdlFile.value, runtimeAttributes.disks) + + // We need both the glob directory and the glob list: + List( + // The glob directory: + JesFileOutput(makeSafeJesReferenceName(globDirectory), gcsGlobDirectoryDestinationPath, Paths.get(globDirectory + "*"), globDirectoryDisk), + // The glob list file: + JesFileOutput(makeSafeJesReferenceName(globListFile), gcsGlobListFileDestinationPath, Paths.get(globListFile), globDirectoryDisk) + ) } private def instantiateCommand: Try[String] = { - val backendInputs = jobDescriptor.inputs mapValues gcsPathToLocal - jobDescriptor.call.instantiateCommandLine(backendInputs, callEngineFunctions, gcsPathToLocal) + val backendInputs = jobDescriptor.inputDeclarations mapValues gcsPathToLocal + jobDescriptor.call.task.instantiateCommand(backendInputs, callEngineFunctions, valueMapper = gcsPathToLocal) } - private def uploadCommandScript(command: String, withMonitoring: Boolean): Future[Unit] = { + private def uploadCommandScript(command: String, withMonitoring: Boolean, globFiles: Set[WdlGlobFile]): Future[Unit] = { val monitoring = if (withMonitoring) { s"""|touch $JesMonitoringLogFile |chmod u+x $JesMonitoringScript @@ -272,6 +271,22 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes val tmpDir = File(JesWorkingDisk.MountPoint)./("tmp").path val rcPath = File(JesWorkingDisk.MountPoint)./(returnCodeFilename).path + def globManipulation(globFile: WdlGlobFile) = { + + val globDir = callEngineFunctions.globName(globFile.value) + val (_, disk) = relativePathAndAttachedDisk(globFile.value, runtimeAttributes.disks) + val globDirectory = Paths.get(s"${disk.mountPoint.toAbsolutePath}/$globDir/") + val globList = Paths.get(s"${disk.mountPoint.toAbsolutePath}/$globDir.list") + + s""" + |mkdir $globDirectory + |ln ${globFile.value} $globDirectory + |ls -1 $globDirectory > $globList + """.stripMargin + } + + val globManipulations = globFiles.map(globManipulation).mkString("\n") + val fileContent = s""" |#!/bin/bash @@ -281,35 +296,34 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes |( |cd ${JesWorkingDisk.MountPoint} |$command + |$globManipulations |) |echo $$? > $rcPath """.stripMargin.trim - def writeScript(): Future[Unit] = Future { File(jesCallPaths.gcsExecPath).write(fileContent) } void - - implicit val system = context.system - Retry.withRetry( - writeScript, - isTransient = isTransientJesException, - isFatal = isFatalJesException - ) + Future(File(jesCallPaths.script).write(fileContent)) void } private def googleProject(descriptor: BackendWorkflowDescriptor): String = { descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleProject, jesAttributes.project) } + private def computeServiceAccount(descriptor: BackendWorkflowDescriptor): String = { + descriptor.workflowOptions.getOrElse(WorkflowOptionKeys.GoogleComputeServiceAccount, jesAttributes.computeServiceAccount) + } + private def createJesRun(jesParameters: Seq[JesParameter], runIdForResumption: Option[String]): Future[Run] = { def createRun() = Future(Run( runIdForResumption, jobDescriptor = jobDescriptor, runtimeAttributes = runtimeAttributes, - callRootPath = callRootPath.toString, + callRootPath = callRootPath.toUri.toString, commandLine = jesCommandLine, logFileName = jesLogFilename, jesParameters, googleProject(jobDescriptor.workflowDescriptor), + computeServiceAccount(jobDescriptor.workflowDescriptor), retryable, initializationData.genomics )) @@ -332,8 +346,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes } protected def runWithJes(command: String, - jesInputs: Seq[JesInput], - jesOutputs: Seq[JesFileOutput], + jesInputs: Set[JesInput], + jesOutputs: Set[JesFileOutput], runIdForResumption: Option[String], withMonitoring: Boolean): Future[ExecutionHandle] = { @@ -342,7 +356,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes val jesParameters = standardParameters ++ gcsAuthParameter ++ jesInputs ++ jesOutputs val jesJobSetup = for { - _ <- uploadCommandScript(command, withMonitoring) + _ <- uploadCommandScript(command, withMonitoring, findGlobOutputs(jobDescriptor)) run <- createJesRun(jesParameters, runIdForResumption) _ = tellMetadata(Map(CallMetadataKeys.JobId -> run.runId)) } yield run @@ -359,8 +373,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes } private def startExecuting(monitoringOutput: Option[JesFileOutput], mode: ExecutionMode): Future[ExecutionHandle] = { - val jesInputs: Seq[JesInput] = generateJesInputs(jobDescriptor).toSeq ++ monitoringScript :+ cmdInput - val jesOutputs: Seq[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput + val jesInputs: Set[JesInput] = generateJesInputs(jobDescriptor) ++ monitoringScript + cmdInput + val jesOutputs: Set[JesFileOutput] = generateJesOutputs(jobDescriptor) ++ monitoringOutput instantiateCommand match { case Success(command) => runWithJes(command, jesInputs, jesOutputs, mode.jobId.collectFirst { case j: JesJobId => j.operationId }, monitoringScript.isDefined) @@ -425,29 +439,7 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes /** * Fire and forget start info to the metadata service */ - private def tellStartMetadata(): Unit = { - val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { - case (key, value) => s"runtimeAttributes:$key" -> value - } - - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs - } - - val otherMetadata: Map[String, Any] = Map( - JesMetadataKeys.GoogleProject -> jesAttributes.project, - JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, - JesMetadataKeys.EndpointUrl -> jesAttributes.endpointUrl, - "preemptible" -> preemptible, - "cache:allowResultReuse" -> true - ) - - val metadataKeyValues = runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata - - tellMetadata(metadataKeyValues) - } + private def tellStartMetadata() = tellMetadata(metadataKeyValues) /** * Fire and forget info to the metadata service @@ -457,59 +449,32 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes serviceRegistryActor.putMetadata(jobDescriptor.workflowDescriptor.id, Option(jobDescriptor.key), metadataKeyValues) } - private def customLookupFunction(alreadyGeneratedOutputs: Map[String, WdlValue])(toBeLookedUp: String): WdlValue = alreadyGeneratedOutputs.getOrElse(toBeLookedUp, lookup(toBeLookedUp)) - - private[jes] def wdlValueToGcsPath(jesOutputs: Seq[JesFileOutput])(value: WdlValue): WdlValue = { + private[jes] def wdlValueToGcsPath(jesOutputs: Set[JesFileOutput])(value: WdlValue): WdlValue = { def toGcsPath(wdlFile: WdlFile) = jesOutputs collectFirst { case o if o.name == makeSafeJesReferenceName(wdlFile.valueString) => WdlFile(o.gcs) } getOrElse value + value match { case wdlArray: WdlArray => wdlArray map wdlValueToGcsPath(jesOutputs) case wdlMap: WdlMap => wdlMap map { case (k, v) => wdlValueToGcsPath(jesOutputs)(k) -> wdlValueToGcsPath(jesOutputs)(v) } - case file: WdlFile => if (file.value.isGcsUrl) file else toGcsPath(file) + case file: WdlFile => toGcsPath(file) case other => other } } - private def outputLookup(taskOutput: TaskOutput, currentList: Seq[AttemptedLookupResult]) = for { - /** - * This will evaluate the task output expression and coerces it to the task output's type. - * If the result is a WdlFile, then attempt to find the JesOutput with the same path and - * return a WdlFile that represents the GCS path and not the local path. For example, - * - *
-    * output {
-    *   File x = "out" + ".txt"
-    * }
-    * 
- * - * "out" + ".txt" is evaluated to WdlString("out.txt") and then coerced into a WdlFile("out.txt") - * Then, via wdlFileToGcsPath(), we attempt to find the JesOutput with .name == "out.txt". - * If it is found, then WdlFile("gs://some_bucket/out.txt") will be returned. - */ - wdlValue <- taskOutput.requiredExpression.evaluate(customLookupFunction(currentList.toLookupMap), callEngineFunctions) - coercedValue <- taskOutput.wdlType.coerceRawValue(wdlValue) - value = wdlValueToGcsPath(generateJesOutputs(jobDescriptor))(coercedValue) - } yield value + private def postProcess: Try[CallOutputs] = { + def wdlValueToSuccess(value: WdlValue): Try[WdlValue] = Success(value) - - private def outputFoldingFunction: (Seq[AttemptedLookupResult], TaskOutput) => Seq[AttemptedLookupResult] = { - (currentList: Seq[AttemptedLookupResult], taskOutput: TaskOutput) => { - currentList ++ Seq(AttemptedLookupResult(taskOutput.name, outputLookup(taskOutput, currentList))) - } - } - - private def postProcess: Try[JobOutputs] = { - val outputs = call.task.outputs - val outputMappings = outputs.foldLeft(Seq.empty[AttemptedLookupResult])(outputFoldingFunction).map(_.toPair).toMap - TryUtil.sequenceMap(outputMappings) map { outputMap => - outputMap mapValues { v => JobOutput(v) } - } + OutputEvaluator.evaluateOutputs( + jobDescriptor, + callEngineFunctions, + (wdlValueToSuccess _).compose(wdlValueToGcsPath(generateJesOutputs(jobDescriptor))) + ) } - private def handleSuccess(outputMappings: Try[JobOutputs], returnCode: Int, jobDetritusFiles: Map[String, String], executionHandle: ExecutionHandle, events: Seq[ExecutionEvent]): ExecutionHandle = { + private def handleSuccess(outputMappings: Try[CallOutputs], returnCode: Int, jobDetritusFiles: Map[String, Path], executionHandle: ExecutionHandle, events: Seq[ExecutionEvent]): ExecutionHandle = { outputMappings match { case Success(outputs) => SuccessfulExecutionHandle(outputs, returnCode, jobDetritusFiles, events) case Failure(ex: CromwellAggregatedException) if ex.throwables collectFirst { case s: SocketTimeoutException => s } isDefined => @@ -588,10 +553,10 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes FailedNonRetryableExecutionHandle(new RuntimeException( s"execution failed: could not parse return code as integer: ${returnCodeContents.get}")).future case _: RunStatus.Success if !continueOnReturnCode.continueFor(returnCode.get) => - val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCode.getOrElse("(none)")}" + val badReturnCodeMessage = s"Call ${jobDescriptor.key.tag}: return code was ${returnCode.getOrElse("(none)")}" FailedNonRetryableExecutionHandle(new RuntimeException(badReturnCodeMessage), returnCode.toOption).future case success: RunStatus.Success => - handleSuccess(postProcess, returnCode.get, jesCallPaths.detritusPaths.mapValues(_.toString), handle, success.eventList).future + handleSuccess(postProcess, returnCode.get, jesCallPaths.detritusPaths, handle, success.eventList).future case RunStatus.Failed(errorCode, errorMessage, _, _, _, _) => handleFailure(errorCode, errorMessage) } } catch { @@ -611,8 +576,8 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes * @param gcsPath The input path * @return A path which is unique per input path */ - private def localFilePathFromCloudStoragePath(mountPoint: Path, gcsPath: NioGcsPath): Path = { - mountPoint.resolve(gcsPath.bucket).resolve(gcsPath.objectName) + private def localFilePathFromCloudStoragePath(mountPoint: Path, gcsPath: CloudStoragePath): Path = { + mountPoint.resolve(gcsPath.bucket()).resolve(gcsPath.toUri.getPath.stripPrefix("/")) } /** @@ -625,11 +590,14 @@ class JesAsyncBackendJobExecutionActor(override val jobDescriptor: BackendJobDes private[jes] def gcsPathToLocal(wdlValue: WdlValue): WdlValue = { wdlValue match { case wdlFile: WdlFile => - Try(getPath(wdlFile.valueString)) match { - case Success(gcsPath: NioGcsPath) => + getPath(wdlFile.valueString) match { + case Success(gcsPath: CloudStoragePath) => WdlFile(localFilePathFromCloudStoragePath(workingDisk.mountPoint, gcsPath).toString, wdlFile.isGlob) - case Success(otherPath) => wdlValue - case Failure(e) => wdlValue + case Success(proxy: PathProxy) => + proxy.unbox(classOf[CloudStoragePath]) map { gcsPath => + WdlFile(localFilePathFromCloudStoragePath(workingDisk.mountPoint, gcsPath).toString, wdlFile.isGlob) + } getOrElse wdlValue + case _ => wdlValue } case wdlArray: WdlArray => wdlArray map gcsPathToLocal case wdlMap: WdlMap => wdlMap map { case (k, v) => gcsPathToLocal(k) -> gcsPathToLocal(v) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala index 2e1d00b0e..ac2475e64 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesAttributes.scala @@ -6,30 +6,29 @@ import cats.data._ import cats.data.Validated._ import cats.syntax.cartesian._ import com.typesafe.config.Config -import cromwell.backend.impl.jes.JesImplicits.GoogleAuthWorkflowOptions -import cromwell.core.WorkflowOptions -import cromwell.filesystems.gcs.{GoogleAuthMode, GoogleConfiguration} +import cromwell.backend.impl.jes.authentication.JesAuths +import cromwell.core.ErrorOr._ +import cromwell.filesystems.gcs.GoogleConfiguration import lenthall.config.ValidatedConfig._ import net.ceedubs.ficus.Ficus._ -import cromwell.core.ErrorOr._ import wdl4s.ExceptionWithErrors case class JesAttributes(project: String, - genomicsAuth: GoogleAuthMode, - gcsFilesystemAuth: GoogleAuthMode, + computeServiceAccount: String, + auths: JesAuths, executionBucket: String, endpointUrl: URL, - maxPollingInterval: Int) { - def genomicsCredential(options: WorkflowOptions) = genomicsAuth.credential(options.toGoogleAuthOptions) - def gcsCredential(options: WorkflowOptions) = gcsFilesystemAuth.credential(options.toGoogleAuthOptions) -} + maxPollingInterval: Int, + qps: Int) object JesAttributes { + val GenomicsApiDefaultQps = 1000 private val jesKeys = Set( "project", "root", "maximum-polling-interval", + "compute-service-account", "dockerhub", "genomics", "filesystems", @@ -47,14 +46,17 @@ object JesAttributes { val executionBucket: ValidatedNel[String, String] = backendConfig.validateString("root") val endpointUrl: ErrorOr[URL] = backendConfig.validateURL("genomics.endpoint-url") val maxPollingInterval: Int = backendConfig.as[Option[Int]]("maximum-polling-interval").getOrElse(600) + val computeServiceAccount: String = backendConfig.as[Option[String]]("genomics.compute-service-account").getOrElse("default") val genomicsAuthName: ErrorOr[String] = backendConfig.validateString("genomics.auth") val gcsFilesystemAuthName: ErrorOr[String] = backendConfig.validateString("filesystems.gcs.auth") + val qps = backendConfig.as[Option[Int]]("genomics-api-queries-per-100-seconds").getOrElse(GenomicsApiDefaultQps) / 100 + (project |@| executionBucket |@| endpointUrl |@| genomicsAuthName |@| gcsFilesystemAuthName) map { (_, _, _, _, _) } flatMap { case (p, b, u, genomicsName, gcsName) => (googleConfig.auth(genomicsName) |@| googleConfig.auth(gcsName)) map { case (genomicsAuth, gcsAuth) => - JesAttributes(p, genomicsAuth, gcsAuth, b, u, maxPollingInterval) + JesAttributes(p, computeServiceAccount, JesAuths(genomicsAuth, gcsAuth), b, u, maxPollingInterval, qps) } } match { case Valid(r) => r diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala index 92f653c10..f61769379 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendLifecycleActorFactory.scala @@ -8,9 +8,9 @@ import cromwell.backend._ import cromwell.backend.callcaching.FileHashingActor.FileHashingFunction import cromwell.backend.impl.jes.callcaching.JesBackendFileHashing import cromwell.backend.validation.RuntimeAttributesKeys +import cromwell.core.CallOutputs import cromwell.core.Dispatcher.BackendDispatcher -import cromwell.core.{ExecutionStore, OutputStore} -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions @@ -21,7 +21,7 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor val jesConfiguration = new JesConfiguration(configurationDescriptor) override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(JesInitializationActor.props(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor).withDispatcher(BackendDispatcher)) } @@ -46,15 +46,15 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor } override def workflowFinalizationActorProps(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], - executionStore: ExecutionStore, - outputStore: OutputStore, + calls: Set[TaskCall], + jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[BackendInitializationData]) = { // The `JesInitializationActor` will only return a non-`Empty` `JesBackendInitializationData` from a successful `beforeAll` // invocation. HOWEVER, the finalization actor is created regardless of whether workflow initialization was successful // or not. So the finalization actor must be able to handle an empty `JesBackendInitializationData` option, and there is no // `.get` on the initialization data as there is with the execution or cache hit copying actor methods. - Option(JesFinalizationActor.props(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData.toJes).withDispatcher(BackendDispatcher)) + Option(JesFinalizationActor.props(workflowDescriptor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, initializationData.toJes).withDispatcher(BackendDispatcher)) } override def runtimeAttributeDefinitions(initializationDataOption: Option[BackendInitializationData]) = staticRuntimeAttributeDefinitions @@ -63,16 +63,21 @@ case class JesBackendLifecycleActorFactory(name: String, configurationDescriptor jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jesCallPaths = initializationData.toJes.get.workflowPaths.toJesCallPaths(jobKey) - new JesExpressionFunctions(List(jesCallPaths.gcsFileSystem), jesCallPaths.callContext) + val jesCallPaths = initializationData.toJes.get.workflowPaths.toJobPaths(jobKey) + new JesExpressionFunctions(List(jesCallPaths.gcsPathBuilder), jesCallPaths.callContext) } override def getExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, initializationData: Option[BackendInitializationData]): Path = { - initializationData.toJes.get.workflowPaths.rootPath + initializationData.toJes.get.workflowPaths.executionRoot } - override def backendSingletonActorProps = Option(JesBackendSingletonActor.props()) + override def getWorkflowExecutionRootPath(workflowDescriptor: BackendWorkflowDescriptor, backendConfig: Config, + initializationData: Option[BackendInitializationData]): Path = { + initializationData.toJes.get.workflowPaths.workflowRoot + } + + override def backendSingletonActorProps = Option(JesBackendSingletonActor.props(jesConfiguration.qps)) override lazy val fileHashingFunction: Option[FileHashingFunction] = Option(FileHashingFunction(JesBackendFileHashing.getCrc32c)) } @@ -86,8 +91,8 @@ object JesBackendLifecycleActorFactory { } val staticRuntimeAttributeDefinitions = { - import RuntimeAttributesKeys._ import JesRuntimeAttributes._ + import RuntimeAttributesKeys._ Set( RuntimeAttributeDefinition(DockerKey, None, usedInCallCaching = true), diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala index 3b830107a..5c1275ae1 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesBackendSingletonActor.scala @@ -1,12 +1,12 @@ package cromwell.backend.impl.jes import akka.actor.{Actor, ActorLogging, Props} -import cromwell.backend.impl.jes.statuspolling.{JesApiQueryManager} +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll -class JesBackendSingletonActor extends Actor with ActorLogging { +final case class JesBackendSingletonActor(qps: Int) extends Actor with ActorLogging { - val pollingActor = context.actorOf(JesApiQueryManager.props) + val pollingActor = context.actorOf(JesApiQueryManager.props(qps)) override def receive = { case poll: DoPoll => @@ -16,5 +16,5 @@ class JesBackendSingletonActor extends Actor with ActorLogging { } object JesBackendSingletonActor { - def props(): Props = Props(new JesBackendSingletonActor()) + def props(qps: Int): Props = Props(JesBackendSingletonActor(qps)) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala index 078e2e081..a77dd66c7 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCacheHitCopyingActor.scala @@ -5,7 +5,7 @@ import java.nio.file.Path import akka.actor.{ActorRef, Props} import cromwell.backend.callcaching.CacheHitDuplicating import cromwell.backend.{BackendCacheHitCopyingActor, BackendJobDescriptor} -import cromwell.core.PathCopier +import cromwell.core.path.PathCopier import cromwell.core.logging.JobLogging case class JesCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, @@ -15,9 +15,11 @@ case class JesCacheHitCopyingActor(override val jobDescriptor: BackendJobDescrip extends BackendCacheHitCopyingActor with CacheHitDuplicating with JesJobCachingActorHelper with JobLogging { override protected def duplicate(source: Path, destination: Path) = PathCopier.copy(source, destination).get - override protected def destinationCallRootPath = jesCallPaths.callRootPath + override protected def destinationCallRootPath = jesCallPaths.callExecutionRoot override protected def destinationJobDetritusPaths = jesCallPaths.detritusPaths + + override val workflowDescriptor = jobDescriptor.workflowDescriptor } object JesCacheHitCopyingActor { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala deleted file mode 100644 index f18daecaa..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesCallPaths.scala +++ /dev/null @@ -1,82 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.Path - -import cromwell.backend.impl.jes.authentication.JesCredentials -import cromwell.backend.io.JobPaths -import cromwell.backend.io.JobPaths._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.CallContext -import cromwell.services.metadata.CallMetadataKeys - -import scala.concurrent.ExecutionContext - -object JesCallPaths { - def apply(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext): JesCallPaths = { - new JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials) - } - - val JesLogPathKey = "jesLog" - val GcsExecPathKey = "gcsExec" -} - -class JesCallPaths(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) extends - JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials)(ec) { - - val jesLogBasename = { - val index = jobKey.index.map(s => s"-$s").getOrElse("") - s"${jobKey.scope.unqualifiedName}$index" - } - - val callRootPath: Path = { - val callName = jobKey.call.fullyQualifiedName.split('.').last - val call = s"$CallPrefix-$callName" - val shard = jobKey.index map { s => s"$ShardPrefix-$s" } getOrElse "" - val retry = if (jobKey.attempt > 1) s"$AttemptPrefix-${jobKey.attempt}" else "" - - List(call, shard, retry).foldLeft(workflowRootPath)((path, dir) => path.resolve(dir)) - } - - val returnCodeFilename: String = s"$jesLogBasename-rc.txt" - val stdoutFilename: String = s"$jesLogBasename-stdout.log" - val stderrFilename: String = s"$jesLogBasename-stderr.log" - val jesLogFilename: String = s"$jesLogBasename.log" - val gcsExecFilename: String = "exec.sh" - - lazy val returnCodePath: Path = callRootPath.resolve(returnCodeFilename) - lazy val stdoutPath: Path = callRootPath.resolve(stdoutFilename) - lazy val stderrPath: Path = callRootPath.resolve(stderrFilename) - lazy val jesLogPath: Path = callRootPath.resolve(jesLogFilename) - lazy val gcsExecPath: Path = callRootPath.resolve(gcsExecFilename) - lazy val callContext = CallContext(callRootPath, stdoutFilename, stderrFilename) - - /* - TODO: Move various monitoring files path generation here. - - "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. - This JesCallPaths should know about that root, and be able to create the monitoring file paths. - Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. - - Those monitoring paths could then be returned by metadataFiles and detritusFiles. - */ - - lazy val metadataPaths: Map[String, Path] = Map( - CallMetadataKeys.CallRoot -> callRootPath, - CallMetadataKeys.Stdout -> stdoutPath, - CallMetadataKeys.Stderr -> stderrPath, - CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath - ) - - lazy val detritusPaths: Map[String, Path] = Map( - JobPaths.CallRootPathKey -> callRootPath, - JesCallPaths.GcsExecPathKey -> gcsExecPath, - JesCallPaths.JesLogPathKey -> jesLogPath, - JobPaths.StdoutPathKey -> stdoutPath, - JobPaths.StdErrPathKey -> stderrPath, - JobPaths.ReturnCodePathKey -> returnCodePath - ) -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala index 6657250b0..20ec8130d 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesConfiguration.scala @@ -2,14 +2,36 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendConfigurationDescriptor import cromwell.backend.impl.jes.authentication.JesDockerCredentials +import cromwell.backend.impl.jes.io._ import cromwell.core.DockerConfiguration -import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.core.path.CustomRetryParams +import cromwell.core.retry.SimpleExponentialBackoff +import cromwell.filesystems.gcs.{GoogleConfiguration, RetryableGcsPathBuilderFactory} + +import scala.concurrent.duration._ +import scala.language.postfixOps + +object JesConfiguration { + val GcsRetryParams = CustomRetryParams( + timeout = Duration.Inf, + maxRetries = Option(3), + backoff = SimpleExponentialBackoff(1 seconds, 3 seconds, 1.5D), + isTransient = isTransientJesException, + isFatal = isFatalJesException + ) +} class JesConfiguration(val configurationDescriptor: BackendConfigurationDescriptor) { + private val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) + val root = configurationDescriptor.backendConfig.getString("root") - val googleConfig = GoogleConfiguration(configurationDescriptor.globalConfig) val jesAttributes = JesAttributes(googleConfig, configurationDescriptor.backendConfig) + val jesAuths = jesAttributes.auths + val jesComputeServiceAccount = jesAttributes.computeServiceAccount + val gcsPathBuilderFactory = RetryableGcsPathBuilderFactory(jesAuths.gcs, customRetryParams = JesConfiguration.GcsRetryParams) + val genomicsFactory = GenomicsFactory(googleConfig.applicationName, jesAuths.genomics, jesAttributes.endpointUrl) val dockerCredentials = DockerConfiguration.build(configurationDescriptor.backendConfig).dockerCredentials map JesDockerCredentials.apply - val needAuthFileUpload = jesAttributes.gcsFilesystemAuth.requiresAuthFile || dockerCredentials.isDefined + val needAuthFileUpload = jesAuths.gcs.requiresAuthFile || dockerCredentials.isDefined + val qps = jesAttributes.qps } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala index 823108ca8..bf29a387f 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesExpressionFunctions.scala @@ -1,42 +1,38 @@ package cromwell.backend.impl.jes -import java.nio.file.{FileSystem, Path} +import java.nio.file.{Files, Path} -import better.files._ -import cromwell.backend.wdl.{PureFunctions, ReadLikeFunctions, WriteFunctions} -import cromwell.backend.impl.jes.JesImplicits.PathString +import cromwell.backend.wdl.{ReadLikeFunctions, WriteFunctions} import cromwell.core.CallContext -import cromwell.filesystems.gcs.GcsFileSystem -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.core.path.PathBuilder +import cromwell.filesystems.gcs.GcsPathBuilder +import wdl4s.expression.{PureStandardLibraryFunctionsLike, WdlStandardLibraryFunctions} import wdl4s.values._ -import scala.language.postfixOps +import scala.collection.JavaConverters._ import scala.util.{Success, Try} -class JesExpressionFunctions(override val fileSystems: List[FileSystem], - context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { - import JesExpressionFunctions.EnhancedPath +class JesExpressionFunctions(override val pathBuilders: List[PathBuilder], context: CallContext) + extends WdlStandardLibraryFunctions with PureStandardLibraryFunctionsLike with ReadLikeFunctions with WriteFunctions { - private def globDirectory(glob: String): String = s"glob-${glob.md5Sum}/" + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) + private[jes] def globDirectory(glob: String): String = globName(glob) + "/" + private[jes] def globName(glob: String) = s"glob-${glob.md5Sum}" override def globPath(glob: String): String = context.root.resolve(globDirectory(glob)).toString override def glob(path: String, pattern: String): Seq[String] = { - File(path.toAbsolutePath(fileSystems).asDirectory). - glob("**/*") map { _.pathAsString } filterNot { _.toString == path } toSeq + val name = globName(pattern) + val listFile = context.root.resolve(s"$name.list").toRealPath() + Files.readAllLines(listFile).asScala map { fileName => context.root.resolve(s"$name/$fileName").toUri.toString } } - override def preMapping(str: String): String = if (!GcsFileSystem.isAbsoluteGcsPath(str)) context.root.resolve(str).toString else str + override def preMapping(str: String): String = if (!GcsPathBuilder.isValidGcsUrl(str)) { + context.root.resolve(str.stripPrefix("/")).toUri.toString + } else str override def stdout(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stdout)) override def stderr(params: Seq[Try[WdlValue]]) = Success(WdlFile(context.stderr)) override val writeDirectory: Path = context.root } - -object JesExpressionFunctions { - implicit class EnhancedPath(val path: Path) extends AnyVal { - def asDirectory = path.toString.toDirectory(path.getFileSystem) - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala index 038c615dd..fc2d141be 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesFinalizationActor.scala @@ -6,25 +6,26 @@ import akka.actor.Props import better.files._ import cats.instances.future._ import cats.syntax.functor._ -import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor, BackendWorkflowFinalizationActor} +import cromwell.backend.{BackendWorkflowDescriptor, BackendWorkflowFinalizationActor, JobExecutionMap} +import cromwell.core.CallOutputs import cromwell.core.Dispatcher.IoDispatcher -import cromwell.core.{ExecutionStore, OutputStore, PathCopier} -import wdl4s.Call +import cromwell.core.path.PathCopier +import wdl4s.TaskCall import scala.concurrent.Future import scala.language.postfixOps object JesFinalizationActor { - def props(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], jesConfiguration: JesConfiguration, - executionStore: ExecutionStore, outputStore: OutputStore, initializationData: Option[JesBackendInitializationData]) = { - Props(new JesFinalizationActor(workflowDescriptor, calls, jesConfiguration, executionStore, outputStore, initializationData)) + def props(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], jesConfiguration: JesConfiguration, + jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: Option[JesBackendInitializationData]) = { + Props(new JesFinalizationActor(workflowDescriptor, calls, jesConfiguration, jobExecutionMap, workflowOutputs, initializationData)) } } class JesFinalizationActor (override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], - jesConfiguration: JesConfiguration, executionStore: ExecutionStore, - outputStore: OutputStore, + override val calls: Set[TaskCall], + jesConfiguration: JesConfiguration, jobExecutionMap: JobExecutionMap, + workflowOutputs: CallOutputs, initializationData: Option[JesBackendInitializationData]) extends BackendWorkflowFinalizationActor { override val configurationDescriptor = jesConfiguration.configurationDescriptor @@ -68,19 +69,19 @@ class JesFinalizationActor (override val workflowDescriptor: BackendWorkflowDesc } private lazy val logPaths: Seq[Path] = { - val allCallPaths = executionStore.store.toSeq collect { - case (backendJobDescriptorKey: BackendJobDescriptorKey, _) => - initializationData map { _.workflowPaths.toJesCallPaths(backendJobDescriptorKey) } + val allCallPaths = jobExecutionMap flatMap { + case (backendJobDescriptor, keys) => + keys map { JesWorkflowPaths(backendJobDescriptor, jesConfiguration)(context.system).toJobPaths(_) } } - allCallPaths.flatten flatMap { callPaths => - Seq(callPaths.stdoutPath, callPaths.stderrPath, callPaths.jesLogPath) + allCallPaths.toSeq flatMap { callPaths => + Seq(callPaths.stdout, callPaths.stderr, callPaths.jesLogPath) } } private def copyLogs(callLogsDirPath: Path, logPaths: Seq[Path]): Unit = { workflowPaths match { - case Some(paths) => logPaths.foreach(PathCopier.copy(paths.rootPath, _, callLogsDirPath)) + case Some(paths) => logPaths.foreach(PathCopier.copy(paths.executionRoot, _, callLogsDirPath)) case None => } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala deleted file mode 100644 index 6c722c756..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesImplicits.scala +++ /dev/null @@ -1,41 +0,0 @@ -package cromwell.backend.impl.jes - -import java.nio.file.{FileSystem, Path} - -import cromwell.core.{PathFactory, WorkflowOptions} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GoogleAuthMode} - -import scala.util.Try - -object JesImplicits { - implicit class GoogleAuthWorkflowOptions(val workflowOptions: WorkflowOptions) extends AnyVal { - def toGoogleAuthOptions: GoogleAuthMode.GoogleAuthOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - } - - object PathBuilder extends PathFactory - - implicit class PathString(val str: String) extends AnyVal { - def isGcsUrl: Boolean = str.startsWith("gs://") - def isUriWithProtocol: Boolean = "^[a-z]+://".r.findFirstIn(str).nonEmpty - - def toPath(fss: List[FileSystem]): Path = PathBuilder.buildPath(str, fss) - def toPath(fs: FileSystem): Path = str.toPath(List(fs)) - - def toAbsolutePath(fss: List[FileSystem]): Path = str.toPath(fss).toAbsolutePath - def toAbsolutePath(fs: FileSystem): Path = str.toAbsolutePath(List(fs)) - - def toDirectory(fss: List[FileSystem]): Path = buildPathAsDirectory(str, fss) - def toDirectory(fs: FileSystem): Path = str.toDirectory(List(fs)) - - // TODO this needs to go away because it's gcs specific. Replacing gcs FS with google implementation (when available) will take care of it - private def buildPathAsDirectory(rawString: String, fileSystems: List[FileSystem]): Path = { - PathBuilder.findFileSystem(rawString, fileSystems, { - case fs: GcsFileSystem => Try(fs.getPathAsDirectory(rawString)) - case fs => Try(fs.getPath(rawString)) - }) - } - } -} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala index e76a62e9d..03cae0dff 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesInitializationActor.scala @@ -7,21 +7,20 @@ import cats.instances.future._ import cats.syntax.functor._ import com.google.api.services.genomics.Genomics import cromwell.backend.impl.jes.JesInitializationActor._ -import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation, JesCredentials} +import cromwell.backend.impl.jes.authentication.{GcsLocalizing, JesAuthInformation} import cromwell.backend.impl.jes.io._ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.WorkflowOptions -import cromwell.core.retry.Retry -import cromwell.filesystems.gcs.{ClientSecrets, GoogleAuthMode} +import cromwell.filesystems.gcs.auth.{ClientSecrets, GoogleAuthMode} import spray.json.JsObject -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlFloatType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue import scala.concurrent.Future +import scala.language.postfixOps import scala.util.Try object JesInitializationActor { @@ -29,14 +28,14 @@ object JesInitializationActor { JesRuntimeAttributes.PreemptibleKey, JesRuntimeAttributes.BootDiskSizeKey, JesRuntimeAttributes.DisksKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], jesConfiguration: JesConfiguration, serviceRegistryActor: ActorRef): Props = Props(new JesInitializationActor(workflowDescriptor, calls, jesConfiguration, serviceRegistryActor: ActorRef)) } class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], private[jes] val jesConfiguration: JesConfiguration, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { @@ -58,14 +57,11 @@ class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDes private[jes] lazy val refreshTokenAuth: Option[JesAuthInformation] = { for { - clientSecrets <- List(jesConfiguration.jesAttributes.gcsFilesystemAuth) collectFirst { case s: ClientSecrets => s } + clientSecrets <- List(jesConfiguration.jesAttributes.auths.gcs) collectFirst { case s: ClientSecrets => s } token <- workflowDescriptor.workflowOptions.get(GoogleAuthMode.RefreshTokenOptionKey).toOption } yield GcsLocalizing(clientSecrets, token) } - private val iOExecutionContext = context.system.dispatchers.lookup(IoDispatcher) - - override protected def coerceDefaultRuntimeAttributes(options: WorkflowOptions): Try[Map[String, WdlValue]] = { RuntimeAttributesDefault.workflowOptionsDefault(options, JesRuntimeAttributes.coercionMap) } @@ -75,32 +71,25 @@ class JesInitializationActor(override val workflowDescriptor: BackendWorkflowDes */ override def beforeAll(): Future[Option[BackendInitializationData]] = { - val genomicsCredential = jesConfiguration.jesAttributes.genomicsCredential(workflowDescriptor.workflowOptions) - val gcsCredential = jesConfiguration.jesAttributes.gcsCredential(workflowDescriptor.workflowOptions) - - val jesCredentials = JesCredentials(genomicsCredential = genomicsCredential, gcsCredential = gcsCredential) def buildGenomics: Future[Genomics] = Future { - GenomicsFactory(jesConfiguration.googleConfig.applicationName, genomicsCredential, jesConfiguration.jesAttributes.endpointUrl) + jesConfiguration.genomicsFactory.withOptions(workflowDescriptor.workflowOptions) } for { - // generate single filesystem and genomics instances genomics <- buildGenomics - workflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration, jesCredentials)(iOExecutionContext) + workflowPaths = new JesWorkflowPaths(workflowDescriptor, jesConfiguration)(context.system) _ <- if (jesConfiguration.needAuthFileUpload) writeAuthenticationFile(workflowPaths) else Future.successful(()) - _ = publishWorkflowRoot(workflowPaths.workflowRootPath.toString) + _ = publishWorkflowRoot(workflowPaths.workflowRoot.toString) } yield Option(JesBackendInitializationData(workflowPaths, genomics)) } private def writeAuthenticationFile(workflowPath: JesWorkflowPaths): Future[Unit] = { generateAuthJson(jesConfiguration.dockerCredentials, refreshTokenAuth) map { content => val path = workflowPath.gcsAuthFilePath - val upload = () => Future(path.writeAsJson(content)) - - workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n ${path.toString}") - Retry.withRetry(upload, isFatal = isFatalJesException, isTransient = isTransientJesException)(context.system).void.recoverWith { + workflowLogger.info(s"Creating authentication file for workflow ${workflowDescriptor.id} at \n ${path.toUri}") + Future(path.writeAsJson(content)).void.recoverWith { case failure => Future.failed(new IOException("Failed to upload authentication file", failure)) - } + } void } getOrElse Future.successful(()) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala index 4ded3e9d1..fe39df840 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobCachingActorHelper.scala @@ -4,12 +4,12 @@ import java.nio.file.Path import akka.actor.{Actor, ActorRef} import better.files._ +import cromwell.backend.BackendWorkflowDescriptor import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys import cromwell.backend.impl.jes.io.{JesAttachedDisk, JesWorkingDisk} import cromwell.core.logging.JobLogging -import scala.language.postfixOps +import scala.util.Try trait JesJobCachingActorHelper extends JobCachingActorHelper { this: Actor with JobLogging => @@ -25,23 +25,33 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { def initializationData: JesBackendInitializationData def serviceRegistryActor: ActorRef + + def workflowDescriptor: BackendWorkflowDescriptor - def getPath(str: String) = jesCallPaths.gcsFileSystem.getPath(str) + def getPath(str: String): Try[Path] = jesCallPaths.getPath(str) override lazy val configurationDescriptor = jesConfiguration.configurationDescriptor - lazy val jesCallPaths = initializationData.workflowPaths.toJesCallPaths(jobDescriptor.key) + lazy val jesCallPaths = { + val workflowPaths = if (workflowDescriptor.breadCrumbs.isEmpty) { + initializationData.workflowPaths + } else { + new JesWorkflowPaths(workflowDescriptor, jesConfiguration)(context.system) + } + + workflowPaths.toJobPaths(jobDescriptor.key) + } lazy val runtimeAttributes = JesRuntimeAttributes(jobDescriptor.runtimeAttributes, jobLogger) lazy val retryable = jobDescriptor.key.attempt <= runtimeAttributes.preemptible lazy val workingDisk: JesAttachedDisk = runtimeAttributes.disks.find(_.name == JesWorkingDisk.Name).get - lazy val callRootPath: Path = jesCallPaths.callRootPath + lazy val callRootPath: Path = jesCallPaths.callExecutionRoot lazy val returnCodeFilename = jesCallPaths.returnCodeFilename - lazy val returnCodeGcsPath = jesCallPaths.returnCodePath - lazy val jesStdoutFile = jesCallPaths.stdoutPath - lazy val jesStderrFile = jesCallPaths.stderrPath + lazy val returnCodeGcsPath = jesCallPaths.returnCode + lazy val jesStdoutFile = jesCallPaths.stdout + lazy val jesStderrFile = jesCallPaths.stderr lazy val jesLogFilename = jesCallPaths.jesLogFilename lazy val defaultMonitoringOutputPath = callRootPath.resolve(JesMonitoringLogFile) @@ -49,28 +59,22 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { lazy val preemptible: Boolean = jobDescriptor.key.attempt <= maxPreemption lazy val jesAttributes = jesConfiguration.jesAttributes - // TODO: Move monitoring paths to JesCallPaths lazy val monitoringScript: Option[JesInput] = { - jobDescriptor.workflowDescriptor.workflowOptions.get(WorkflowOptionKeys.MonitoringScript) map { path => - JesFileInput(s"$MonitoringParamName-in", getPath(path).toString, + jesCallPaths.monitoringPath map { path => + JesFileInput(s"$MonitoringParamName-in", path.toUri.toString, JesWorkingDisk.MountPoint.resolve(JesMonitoringScript), workingDisk) - } toOption + } } lazy val monitoringOutput = monitoringScript map { _ => JesFileOutput(s"$MonitoringParamName-out", defaultMonitoringOutputPath.toString, File(JesMonitoringLogFile).path, workingDisk) } + // Implements CacheHitDuplicating.metadataKeyValues lazy val metadataKeyValues: Map[String, Any] = { val runtimeAttributesMetadata: Map[String, Any] = runtimeAttributes.asMap map { case (key, value) => s"runtimeAttributes:$key" -> value } - - var fileMetadata: Map[String, Any] = jesCallPaths.metadataPaths - if (monitoringOutput.nonEmpty) { - // TODO: Move this to JesCallPaths - fileMetadata += JesMetadataKeys.MonitoringLog -> monitoringOutput.get.gcs - } - + val otherMetadata: Map[String, Any] = Map( JesMetadataKeys.GoogleProject -> jesAttributes.project, JesMetadataKeys.ExecutionBucket -> jesAttributes.executionBucket, @@ -79,6 +83,6 @@ trait JesJobCachingActorHelper extends JobCachingActorHelper { "cache:allowResultReuse" -> true ) - runtimeAttributesMetadata ++ fileMetadata ++ otherMetadata + runtimeAttributesMetadata ++ jesCallPaths.metadataPaths ++ otherMetadata } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala index b1a8ba8d6..476ee04aa 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobExecutionActor.scala @@ -1,6 +1,7 @@ package cromwell.backend.impl.jes -import akka.actor.{ActorRef, Props} +import akka.actor.SupervisorStrategy.{Decider, Stop} +import akka.actor.{ActorRef, OneForOneStrategy, Props} import akka.event.LoggingReceive import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, BackendJobExecutionResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand @@ -62,13 +63,15 @@ case class JesJobExecutionActor(override val jobDescriptor: BackendJobDescriptor private var executor: Option[ActorRef] = None + private[jes] def jabjeaProps = JesAsyncBackendJobExecutionActor.props(jobDescriptor, + completionPromise, + jesConfiguration, + initializationData, + serviceRegistryActor, + jesBackendSingletonActor) + private def launchExecutor: Future[Unit] = Future { - val executionProps = JesAsyncBackendJobExecutionActor.props(jobDescriptor, - completionPromise, - jesConfiguration, - initializationData, - serviceRegistryActor, - jesBackendSingletonActor) + val executionProps = jabjeaProps val executorRef = context.actorOf(executionProps, "JesAsyncBackendJobExecutionActor") executor = Option(executorRef) () @@ -95,4 +98,12 @@ case class JesJobExecutionActor(override val jobDescriptor: BackendJobDescriptor } override def abort(): Unit = {} + + // Supervision strategy: if the JABJEA throws an exception, stop the actor and fail the job. + def jobFailingDecider: Decider = { + case e: Exception => + completionPromise.tryFailure(new RuntimeException("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.", e)) + Stop + } + override val supervisorStrategy = OneForOneStrategy()(jobFailingDecider) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala new file mode 100644 index 000000000..fdbdb1dc7 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesJobPaths.scala @@ -0,0 +1,60 @@ +package cromwell.backend.impl.jes + +import java.nio.file.Path + +import akka.actor.ActorSystem +import cromwell.backend.io.JobPaths +import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} +import cromwell.core.CallContext +import cromwell.services.metadata.CallMetadataKeys + +object JesJobPaths { + def apply(jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem): JesJobPaths = { + new JesJobPaths(jobKey, workflowDescriptor, jesConfiguration) + } + + val JesLogPathKey = "jesLog" + val GcsExecPathKey = "gcsExec" +} + +class JesJobPaths(val jobKey: BackendJobDescriptorKey, workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) extends + JesWorkflowPaths(workflowDescriptor, jesConfiguration)(actorSystem) with JobPaths { + + val jesLogBasename = { + val index = jobKey.index.map(s => s"-$s").getOrElse("") + s"${jobKey.scope.unqualifiedName}$index" + } + + override val returnCodeFilename: String = s"$jesLogBasename-rc.txt" + override val stdoutFilename: String = s"$jesLogBasename-stdout.log" + override val stderrFilename: String = s"$jesLogBasename-stderr.log" + override val scriptFilename: String = "exec.sh" + + val jesLogFilename: String = s"$jesLogBasename.log" + lazy val jesLogPath: Path = callExecutionRoot.resolve(jesLogFilename) + + lazy val callContext = CallContext(callExecutionRoot, stdoutFilename, stderrFilename) + + /* + TODO: Move various monitoring files path generation here. + + "/cromwell_root" is a well known path, called in the regular JobPaths callDockerRoot. + This JesCallPaths should know about that root, and be able to create the monitoring file paths. + Instead of the AsyncActor creating the paths, the paths could then be shared with the CachingActor. + + Those monitoring paths could then be returned by metadataFiles and detritusFiles. + */ + + override lazy val customMetadataPaths = Map( + CallMetadataKeys.BackendLogsPrefix + ":log" -> jesLogPath + ) ++ ( + monitoringPath map { p => Map(JesMetadataKeys.MonitoringLog -> p) } getOrElse Map.empty + ) + + override lazy val customDetritusPaths: Map[String, Path] = Map( + JesJobPaths.GcsExecPathKey -> script, + JesJobPaths.JesLogPathKey -> jesLogPath + ) +} diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala index 9b39c869a..a7ac5e50a 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/JesWorkflowPaths.scala @@ -2,51 +2,61 @@ package cromwell.backend.impl.jes import java.nio.file.Path -import cromwell.backend.impl.jes.authentication.JesCredentials +import akka.actor.ActorSystem +import com.typesafe.config.Config +import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.WorkflowOptionKeys +import cromwell.backend.io.WorkflowPaths import cromwell.backend.{BackendJobDescriptorKey, BackendWorkflowDescriptor} -import cromwell.core.WorkflowOptions.FinalCallLogsDir -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleAuthMode} +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilder +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, RetryableGcsPathBuilder} -import scala.concurrent.ExecutionContext +import scala.language.postfixOps object JesWorkflowPaths { private val GcsRootOptionKey = "jes_gcs_root" private val AuthFilePathOptionKey = "auth_bucket" def apply(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) = { - new JesWorkflowPaths(workflowDescriptor, jesConfiguration, credentials) + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) = { + new JesWorkflowPaths(workflowDescriptor, jesConfiguration) } } -class JesWorkflowPaths(workflowDescriptor: BackendWorkflowDescriptor, - jesConfiguration: JesConfiguration, - credentials: JesCredentials)(implicit ec: ExecutionContext) { +class JesWorkflowPaths(val workflowDescriptor: BackendWorkflowDescriptor, + jesConfiguration: JesConfiguration)(implicit actorSystem: ActorSystem) extends WorkflowPaths { - private val gcsStorage = GoogleAuthMode.buildStorage(credentials.gcsCredential, jesConfiguration.googleConfig.applicationName) - val gcsFileSystemProvider: GcsFileSystemProvider = GcsFileSystemProvider(gcsStorage)(ec) - val gcsFileSystem = GcsFileSystem(gcsFileSystemProvider) + override lazy val executionRootString = workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root) + private val workflowOptions: WorkflowOptions = workflowDescriptor.workflowOptions + val gcsPathBuilder: RetryableGcsPathBuilder = jesConfiguration.gcsPathBuilderFactory.withOptions(workflowOptions) - val rootPath: Path = - gcsFileSystem.getPath(workflowDescriptor.workflowOptions.getOrElse(JesWorkflowPaths.GcsRootOptionKey, jesConfiguration.root)) - - val workflowRootPath: Path = rootPath.resolve(workflowDescriptor.workflowNamespace.workflow.unqualifiedName) - .resolve(workflowDescriptor.id.toString) - - val finalCallLogsPath = workflowDescriptor.getWorkflowOption(FinalCallLogsDir) map { gcsFileSystem.getPath(_) } + def getHash(gcsUrl: Path) = gcsPathBuilder.getHash(gcsUrl) val gcsAuthFilePath: Path = { /* * This is an "exception". The filesystem used here is built from genomicsAuth * unlike everywhere else where the filesystem used is built from gcsFileSystemAuth */ - val genomicsStorage = GoogleAuthMode.buildStorage(credentials.genomicsCredential, jesConfiguration.googleConfig.applicationName) - val fileSystemWithGenomicsAuth = GcsFileSystem(GcsFileSystemProvider(genomicsStorage)(ec)) - val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse workflowRootPath.toString - - fileSystemWithGenomicsAuth.getPath(bucket).resolve(s"${workflowDescriptor.id}_auth.json") + val genomicsCredentials = jesConfiguration.jesAuths.genomics + + // The default auth file bucket is always at the root of the root workflow + val defaultBucket = executionRoot.resolve(workflowDescriptor.rootWorkflow.unqualifiedName).resolve(workflowDescriptor.rootWorkflowId.toString) + + val bucket = workflowDescriptor.workflowOptions.get(JesWorkflowPaths.AuthFilePathOptionKey) getOrElse defaultBucket.toUri.toString + val authBucket = GcsPathBuilderFactory(genomicsCredentials).withOptions(workflowOptions).build(bucket) recover { + case ex => throw new Exception(s"Invalid gcs auth_bucket path $bucket", ex) + } get + + authBucket.resolve(s"${workflowDescriptor.rootWorkflowId}_auth.json") + } + + + val monitoringPath = workflowOptions.get(WorkflowOptionKeys.MonitoringScript).toOption map { path => + // Fail here if the path exists but can't be built + getPath(path).get } - def toJesCallPaths(jobKey: BackendJobDescriptorKey) = JesCallPaths(jobKey, workflowDescriptor, jesConfiguration, credentials)(ec) + override def toJobPaths(jobKey: BackendJobDescriptorKey) = JesJobPaths(jobKey, workflowDescriptor, jesConfiguration) + override def config: Config = jesConfiguration.configurationDescriptor.backendConfig + override def pathBuilders: List[PathBuilder] = List(gcsPathBuilder) } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala index b5a8b5f9f..05078c7f2 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/Run.scala @@ -21,7 +21,6 @@ object Run { "https://www.googleapis.com/auth/compute" ).asJava - private val JesServiceAccount = new ServiceAccount().setEmail("default").setScopes(GenomicsScopes) private val AcceptableEvents = Set("start", "pulling-image", "localizing-files", "running-docker", "delocalizing-files", "ok", "fail", "start-shutdown", "preempted") val NoAddressFieldName = "noAddress" @@ -36,6 +35,7 @@ object Run { logFileName: String, jesParameters: Seq[JesParameter], projectId: String, + computeServiceAccount: String, preemptible: Boolean, genomicsInterface: Genomics): Run = { val logger = new JobLogger("JesRun", jobDescriptor.workflowDescriptor.id, jobDescriptor.key.tag, None, Set(slf4jLogger)) @@ -48,7 +48,7 @@ object Run { .setProjectId(projectId) .setDocker(pipelineInfo.docker) .setResources(pipelineInfo.resources) - .setName(workflow.workflowNamespace.workflow.unqualifiedName) + .setName(workflow.workflow.unqualifiedName) .setInputParameters(jesParameters.collect({ case i: JesInput => i.toGooglePipelineParameter }).toVector.asJava) .setOutputParameters(jesParameters.collect({ case i: JesFileOutput => i.toGooglePipelineParameter }).toVector.asJava) @@ -60,7 +60,8 @@ object Run { } def runPipeline: String = { - val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(JesServiceAccount).setResources(runtimePipelineResources) + val svcAccount = new ServiceAccount().setEmail(computeServiceAccount).setScopes(GenomicsScopes) + val rpargs = new RunPipelineArgs().setProjectId(projectId).setServiceAccount(svcAccount).setResources(runtimePipelineResources) rpargs.setInputs(jesParameters.collect({ case i: JesInput => i.name -> i.toGoogleRunParameter }).toMap.asJava) logger.debug(s"Inputs:\n${stringifyMap(rpargs.getInputs.asScala.toMap)}") diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala new file mode 100644 index 000000000..bb6b048c4 --- /dev/null +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesAuths.scala @@ -0,0 +1,5 @@ +package cromwell.backend.impl.jes.authentication + +import cromwell.filesystems.gcs.auth.GoogleAuthMode + +case class JesAuths(genomics: GoogleAuthMode, gcs: GoogleAuthMode) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala deleted file mode 100644 index b4316fde3..000000000 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesCredentials.scala +++ /dev/null @@ -1,5 +0,0 @@ -package cromwell.backend.impl.jes.authentication - -import com.google.api.client.auth.oauth2.Credential - -case class JesCredentials(genomicsCredential: Credential, gcsCredential: Credential) diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala index 67ddd1df3..9c92b380a 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/authentication/JesVMAuthentication.scala @@ -1,7 +1,7 @@ package cromwell.backend.impl.jes.authentication import cromwell.core.DockerCredentials -import cromwell.filesystems.gcs.ClientSecrets +import cromwell.filesystems.gcs.auth.ClientSecrets import spray.json.{JsString, JsValue} /** diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala index 5c7cf4a8a..42cf8f7be 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/callcaching/JesBackendFileHashing.scala @@ -9,8 +9,8 @@ import scala.util.{Failure, Try} private[jes] object JesBackendFileHashing { def getCrc32c(singleFileHashRequest: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { def usingJesInitData(jesInitData: JesBackendInitializationData) = for { - path <- Try(jesInitData.workflowPaths.gcsFileSystem.getPath(singleFileHashRequest.file.valueString)) - crc32c <- Try(jesInitData.workflowPaths.gcsFileSystemProvider.crc32cHash(path)) + path <- jesInitData.workflowPaths.getPath(singleFileHashRequest.file.valueString) + crc32c <- jesInitData.workflowPaths.getHash(path) } yield crc32c singleFileHashRequest.initializationData match { diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala index 24d417c99..2a2ae8ac2 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/io/package.scala @@ -3,19 +3,14 @@ package cromwell.backend.impl.jes import java.nio.file.{Files, Path} import com.google.api.client.http.HttpResponseException -import cromwell.filesystems.gcs._ +import com.google.cloud.storage.contrib.nio.CloudStorageOptions package object io { implicit class PathEnhanced(val path: Path) extends AnyVal { import better.files._ - def hash = path match { - case gcs: NioGcsPath => gcs.getFileSystem.provider().asInstanceOf[GcsFileSystemProvider].crc32cHash(gcs) - case _ => File(path).md5 - } - def writeAsJson(content: String): File = { - Files.write(path, content.getBytes, ContentTypeOption.Json) + Files.write(path, content.getBytes, CloudStorageOptions.withMimeType("application/json")) } } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala index f3a7739ac..12fe055dc 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManager.scala @@ -11,15 +11,19 @@ import scala.collection.immutable.Queue * Currently, just holds a set of JES status poll requests until a PollingActor pulls the work. * TODO: Could eventually move all of the JES queries into a single work-pulling model. */ -class JesApiQueryManager extends Actor with ActorLogging { +class JesApiQueryManager(val qps: Int) extends Actor with ActorLogging { - private var workQueue: Queue[JesStatusPollQuery] = Queue.empty + // workQueue is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var workQueue: Queue[JesStatusPollQuery] = Queue.empty private var workInProgress: Map[ActorRef, JesPollingWorkBatch] = Map.empty // If the statusPoller dies, we want to stop it and handle the termination ourselves. override val supervisorStrategy = SupervisorStrategy.stoppingStrategy - private def statusPollerProps = JesPollingActor.props(self) - private var statusPoller: ActorRef = _ + private def statusPollerProps = JesPollingActor.props(self, qps) + + // statusPoller is protected for the unit tests, not intended to be generally overridden + protected[statuspolling] var statusPoller: ActorRef = _ + resetStatusPoller() override def receive = { @@ -70,15 +74,17 @@ class JesApiQueryManager extends Actor with ActorLogging { // Currently we can assume this is a polling actor. Might change in a future update: workInProgress.get(terminee) match { case Some(work) => - // Ouch. We should tell all of its clients that it fell over. And then start a new one. - log.error(s"The JES polling actor $terminee unexpectedly terminated while conducting ${work.workBatch.tail.size + 1} polls. Making a new one...") - work.workBatch.toList foreach { _.requester ! JesPollingActor.JesPollError } + // Most likely due to an unexpected HTTP error, push the work back on the queue and keep going + log.info(s"The JES polling actor $terminee unexpectedly terminated while conducting ${work.workBatch.tail.size + 1} polls. Making a new one...") + workInProgress -= terminee + workQueue = workQueue ++ work.workBatch.toList case None => // It managed to die while doing absolutely nothing...!? // Maybe it deserves an entry in https://en.wikipedia.org/wiki/List_of_unusual_deaths // Oh well, in the mean time don't do anything, just start a new one log.error(s"The JES polling actor $terminee managed to unexpectedly terminate whilst doing absolutely nothing. This is probably a programming error. Making a new one...") } + resetStatusPoller() } @@ -93,7 +99,7 @@ class JesApiQueryManager extends Actor with ActorLogging { object JesApiQueryManager { - def props: Props = Props(new JesApiQueryManager) + def props(qps: Int): Props = Props(new JesApiQueryManager(qps)) /** * Poll the job represented by the Run. diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala index 4152d3933..31a9d114a 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActor.scala @@ -7,7 +7,7 @@ import com.google.api.client.googleapis.batch.json.JsonBatchCallback import com.google.api.client.googleapis.json.GoogleJsonError import com.google.api.client.http.HttpHeaders import com.google.api.services.genomics.model.Operation -import cromwell.backend.impl.jes.Run +import cromwell.backend.impl.jes.{JesAttributes, Run} import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.{JesPollingWorkBatch, JesStatusPollQuery, NoWorkToDo} import cromwell.backend.impl.jes.statuspolling.JesPollingActor._ @@ -19,12 +19,11 @@ import scala.concurrent.duration._ /** * Polls JES for status. Pipes the results back (so expect either a RunStatus or a akka.actor.Status.Failure). */ -class JesPollingActor(pollingManager: ActorRef) extends Actor with ActorLogging { +class JesPollingActor(val pollingManager: ActorRef, val qps: Int) extends Actor with ActorLogging { + // The interval to delay between submitting each batch + lazy val batchInterval = determineBatchInterval(determineEffectiveQps(qps)) + log.debug("JES batch polling interval is {}", batchInterval) - // We want to query at just under our fixed JES QPS limit of 20 per second. That should hopefully allow some room at the edges - // for things like new calls, etc. - val MaxBatchSize = 100 - val BatchInterval = 5.5.seconds self ! NoWorkToDo // Starts the check-for-work cycle implicit val ec: ExecutionContext = context.dispatcher @@ -109,14 +108,43 @@ class JesPollingActor(pollingManager: ActorRef) extends Actor with ActorLogging * Warning: Only use this from inside a receive method. */ private def scheduleCheckForWork(): Unit = { - context.system.scheduler.scheduleOnce(BatchInterval) { pollingManager ! JesApiQueryManager.RequestJesPollingWork(MaxBatchSize) } + context.system.scheduler.scheduleOnce(batchInterval) { pollingManager ! JesApiQueryManager.RequestJesPollingWork(MaxBatchSize) } () } + + /** + * We don't want to allow non-positive QPS values. Catch these instances and replace them with a sensible default. + * Here we're using the default value coming from JES itself + */ + private def determineEffectiveQps(qps: Int): Int = { + if (qps > 0) qps + else { + val defaultQps = JesAttributes.GenomicsApiDefaultQps + log.warning("Supplied QPS for Google Genomics API was not positive, value was {} using {} instead", qps, defaultQps) + defaultQps + } + } } object JesPollingActor { - def props(pollingManager: ActorRef) = Props(new JesPollingActor(pollingManager)) + def props(pollingManager: ActorRef, qps: Int) = Props(new JesPollingActor(pollingManager, qps)) + + // The Batch API limits us to 100 at a time + val MaxBatchSize = 100 + + /** + * Given the Genomics API queries per 100 seconds and given MaxBatchSize will determine a batch interval which + * is at 90% of the quota. The (still crude) delta is to provide some room at the edges for things like new + * calls, etc. + * + * Forcing the minimum value to be 1 second, for now it seems unlikely to matter and it makes testing a bit + * easier + */ + def determineBatchInterval(qps: Int): FiniteDuration = { + val maxInterval = MaxBatchSize / qps.toDouble // Force this to be floating point in case the value is < 1 + val interval = Math.max(maxInterval * 0.9, 1) + interval.seconds + } final case class JesPollFailed(e: GoogleJsonError, responseHeaders: HttpHeaders) - case object JesPollError } diff --git a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala index 1bf7328f8..9070378c3 100644 --- a/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala +++ b/supportedBackends/jes/src/main/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorClient.scala @@ -3,7 +3,7 @@ package cromwell.backend.impl.jes.statuspolling import java.io.IOException import akka.actor.{Actor, ActorLogging, ActorRef} -import cromwell.backend.impl.jes.statuspolling.JesPollingActor.{JesPollError, JesPollFailed} +import cromwell.backend.impl.jes.statuspolling.JesPollingActor.JesPollFailed import cromwell.backend.impl.jes.{Run, RunStatus} import scala.concurrent.{Future, Promise} @@ -28,9 +28,6 @@ trait JesPollingActorClient { this: Actor with ActorLogging => case JesPollFailed(e, responseHeaders) => log.debug("JES poll failed! Sad.") completePromise(Failure(new IOException(s"Google request failed: ${e.toPrettyString}"))) - case JesPollError => - log.debug("JES poll failed when polling actor died unexpectedly! Sad.") - completePromise(Failure(new RuntimeException("Unexpected actor death!"))) } private def completePromise(runStatus: Try[RunStatus]) = { diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala index 5601e1ebb..40078cac6 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAsyncBackendJobExecutionActorSpec.scala @@ -4,19 +4,20 @@ import java.nio.file.Paths import java.util.UUID import akka.actor.{ActorRef, Props} -import akka.event.LoggingAdapter import akka.testkit.{ImplicitSender, TestActorRef, TestDuration, TestProbe} +import com.google.cloud.storage.contrib.nio.CloudStoragePath import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse +import cromwell.backend._ import cromwell.backend.async.AsyncBackendJobExecutionActor.{Execute, ExecutionMode} import cromwell.backend.async.{AbortedExecutionHandle, ExecutionHandle, FailedNonRetryableExecutionHandle, FailedRetryableExecutionHandle} import cromwell.backend.impl.jes.JesAsyncBackendJobExecutionActor.JesPendingExecutionHandle -import cromwell.backend.impl.jes.MockObjects._ import cromwell.backend.impl.jes.RunStatus.Failed import cromwell.backend.impl.jes.io.{DiskType, JesWorkingDisk} -import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendWorkflowDescriptor, PreemptedException, RuntimeAttributeDefinition} -import cromwell.core.logging.LoggerWrapper +import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll +import cromwell.core.logging.JobLogger import cromwell.core.{WorkflowId, WorkflowOptions, _} -import cromwell.filesystems.gcs._ +import cromwell.filesystems.gcs.GcsPathBuilderFactory +import cromwell.filesystems.gcs.auth.GoogleAuthMode.NoAuthMode import cromwell.util.SampleWdl import org.scalatest._ import org.scalatest.prop.Tables.Table @@ -25,16 +26,16 @@ import org.specs2.mock.Mockito import spray.json.{JsObject, JsValue} import wdl4s.types.{WdlArrayType, WdlFileType, WdlMapType, WdlStringType} import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlString, WdlValue} -import wdl4s.{Call, LocallyQualifiedName, NamespaceWithWorkflow} +import wdl4s.{LocallyQualifiedName, FullyQualifiedName => _, _} import scala.concurrent.duration._ import scala.concurrent.{Await, ExecutionContext, Future, Promise} import scala.util.{Success, Try} -import cromwell.backend.impl.jes.MockObjects._ -import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.DoPoll class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackendJobExecutionActorSpec") - with FlatSpecLike with Matchers with ImplicitSender with Mockito { + with FlatSpecLike with Matchers with ImplicitSender with Mockito with BackendSpec { + + val mockPathBuilder = GcsPathBuilderFactory(NoAuthMode).withOptions(mock[WorkflowOptions]) import JesTestConfig._ @@ -56,7 +57,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend | } |} | - |workflow sup { + |workflow wf_sup { | call sup |} """.stripMargin @@ -65,16 +66,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val NoOptions = WorkflowOptions(JsObject(Map.empty[String, JsValue])) - val TestableCallContext = CallContext(MockGcsFileSystemBuilder.mockGcsFileSystem.getPath("gs://root"), "out", "err") + val TestableCallContext = CallContext(mockPathBuilder.build("gs://root").get, "out", "err") val TestableJesExpressionFunctions = { - new JesExpressionFunctions(List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) + new JesExpressionFunctions(List(mockPathBuilder), TestableCallContext) } private def buildInitializationData(jobDescriptor: BackendJobDescriptor, configuration: JesConfiguration) = { - val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, - configuration, - mockCredentials)(scala.concurrent.ExecutionContext.global) + val workflowPaths = JesWorkflowPaths(jobDescriptor.workflowDescriptor, configuration)(system) JesBackendInitializationData(workflowPaths, null) } @@ -85,12 +84,9 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend jesSingletonActor: ActorRef = emptyActor) extends JesAsyncBackendJobExecutionActor(jobDescriptor, promise, jesConfiguration, buildInitializationData(jobDescriptor, jesConfiguration), emptyActor, jesSingletonActor) { - override lazy val jobLogger = new LoggerWrapper { - override def akkaLogger: Option[LoggingAdapter] = Option(log) - + override lazy val jobLogger = new JobLogger("TestLogger", workflowId, jobTag, akkaLogger = Option(log)) { override def tag: String = s"$name [UUID(${workflowId.shortString})$jobTag]" - - override def slf4jLoggers: Set[Logger] = Set.empty + override val slf4jLoggers: Set[Logger] = Set.empty } override lazy val callEngineFunctions = functions @@ -110,15 +106,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend private def buildPreemptibleJobDescriptor(attempt: Int, preemptible: Int): BackendJobDescriptor = { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", s"preemptible: $preemptible"), Seq.empty[ImportResolver]).workflow, Inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, attempt) val runtimeAttributes = makeRuntimeAttributes(job) - BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Inputs) + BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(Inputs)) } private def executionActor(jobDescriptor: BackendJobDescriptor, @@ -128,7 +124,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend // Mock/stub out the bits that would reach out to JES. val run = mock[Run] - val handle = JesPendingExecutionHandle(jobDescriptor, Seq.empty, run, None) + val handle = JesPendingExecutionHandle(jobDescriptor, Set.empty, run, None) class ExecuteOrRecoverActor extends TestableJesJobExecutionActor(jobDescriptor, promise, jesConfiguration, jesSingletonActor = jesSingletonActor) { override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext): Future[ExecutionHandle] = Future.successful(handle) @@ -187,8 +183,8 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend expectations foreach { case (attempt, preemptible, errorCode, innerErrorCode, shouldRetry) => it should s"handle call failures appropriately with respect to preemption (attempt=$attempt, preemptible=$preemptible, errorCode=$errorCode, innerErrorCode=$innerErrorCode)" in { runAndFail(attempt, preemptible, errorCode, innerErrorCode).getClass.getSimpleName match { - case "FailedNonRetryableResponse" => false shouldBe shouldRetry - case "FailedRetryableResponse" => true shouldBe shouldRetry + case "JobFailedNonRetryableResponse" => false shouldBe shouldRetry + case "JobFailedRetryableResponse" => true shouldBe shouldRetry case huh => fail(s"Unexpected response class name: '$huh'") } } @@ -284,21 +280,21 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", "")), + WdlNamespaceWithWorkflow.load(YoSup.replace("[PREEMPTIBLE]", ""), Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") - val mappedInputs = jobDescriptor.inputs mapValues testActorRef.underlyingActor.gcsPathToLocal + val mappedInputs = jobDescriptor.fullyQualifiedInputs mapValues testActorRef.underlyingActor.gcsPathToLocal mappedInputs(stringKey) match { case WdlString(v) => assert(v.equalsIgnoreCase(stringVal.value)) @@ -338,15 +334,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -377,15 +373,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend TestActorRef[TestableJesJobExecutionActor] = { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(sampleWdl.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName(callName).get + val call = workflowDescriptor.workflow.findCallByName(callName).get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration, functions)) TestActorRef[TestableJesJobExecutionActor](props, s"TestableJesJobExecutionActor-${jobDescriptor.workflowDescriptor.id}") @@ -393,14 +389,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "generate correct JesOutputs" in { val inputs = Map( - "in" -> WdlFile("gs://a/b/c.txt") + "in" -> WdlFile("gs://blah/b/c.txt") ) val jesBackend = makeJesActorRef(SampleWdl.FilePassingWorkflow, "a", inputs).underlyingActor val jobDescriptor = jesBackend.jobDescriptor val workflowId = jesBackend.workflowId val jesInputs = jesBackend.generateJesInputs(jobDescriptor) jesInputs should have size 1 - jesInputs should contain(JesFileInput("in-0", "gs://a/b/c.txt", Paths.get("a/b/c.txt"), workingDisk)) + jesInputs should contain(JesFileInput("in-0", "gs://blah/b/c.txt", Paths.get("blah/b/c.txt"), workingDisk)) val jesOutputs = jesBackend.generateJesOutputs(jobDescriptor) jesOutputs should have size 1 jesOutputs should contain(JesFileOutput("out", @@ -413,7 +409,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend ) class TestJesExpressionFunctions extends JesExpressionFunctions( - List(MockGcsFileSystemBuilder.mockGcsFileSystem), TestableCallContext) { + List(mockPathBuilder), TestableCallContext) { override def write_lines(params: Seq[Try[WdlValue]]): Try[WdlFile] = { Success(WdlFile(s"gs://some/path/file.txt")) } @@ -438,15 +434,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -466,15 +462,15 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.CurrentDirectory.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, inputs, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) - val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, inputs) + val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, fqnMapToDeclarationMap(inputs)) val props = Props(new TestableJesJobExecutionActor(jobDescriptor, Promise(), jesConfiguration)) val testActorRef = TestActorRef[TestableJesJobExecutionActor]( @@ -487,7 +483,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend } it should "convert local Paths back to corresponding GCS paths in JesOutputs" in { - val jesOutputs = Seq( + val jesOutputs = Set( JesFileOutput("/cromwell_root/path/to/file1", "gs://path/to/file1", Paths.get("/cromwell_root/path/to/file1"), workingDisk), JesFileOutput("/cromwell_root/path/to/file2", "gs://path/to/file2", @@ -510,12 +506,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, NoOptions ) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -538,12 +534,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "create a JesFileInput for the monitoring script, when specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString("""{"monitoring_script": "gs://path/to/script"}""").get ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val runtimeAttributes = makeRuntimeAttributes(job) val key = BackendJobDescriptorKey(job, None, 1) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -559,12 +555,12 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "not create a JesFileInput for the monitoring script, when not specified" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource), + WdlNamespaceWithWorkflow.load(SampleWdl.EmptyString.asWorkflowSources(DockerAndDiskRuntime).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, NoOptions ) - val job = workflowDescriptor.workflowNamespace.workflow.calls.head + val job = workflowDescriptor.workflow.taskCalls.head val key = BackendJobDescriptorKey(job, None, 1) val runtimeAttributes = makeRuntimeAttributes(job) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -579,13 +575,13 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend it should "return JES log paths for non-scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7c")), - NamespaceWithWorkflow.load( - SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + SampleWdl.HelloWorld.asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("hello").get + val call = workflowDescriptor.workflow.findCallByName("hello").get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, None, 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -596,28 +592,27 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - // TODO: NioGcsPath.equals not implemented, so use toString instead - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe - "gs://path/to/gcs_root/hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" + jesBackend.jesCallPaths.stdout should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stdout.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stdout.log" + jesBackend.jesCallPaths.stderr should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stderr.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello-stderr.log" + jesBackend.jesCallPaths.jesLogPath should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.jesLogPath.toUri.toString shouldBe + "gs://path/to/gcs_root/wf_hello/e6236763-c518-41d0-9688-432549a8bf7c/call-hello/hello.log" } it should "return JES log paths for scattered call" in { val workflowDescriptor = BackendWorkflowDescriptor( WorkflowId(UUID.fromString("e6236763-c518-41d0-9688-432549a8bf7d")), - NamespaceWithWorkflow.load( - new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource), + WdlNamespaceWithWorkflow.load( + new SampleWdl.ScatterWdl().asWorkflowSources(""" runtime {docker: "ubuntu:latest"} """).wdlSource, Seq.empty[ImportResolver]).workflow, Map.empty, WorkflowOptions.fromJsonString(""" {"jes_gcs_root": "gs://path/to/gcs_root"} """).get ) - val call = workflowDescriptor.workflowNamespace.workflow.findCallByName("B").get + val call = workflowDescriptor.workflow.findCallByName("B").get.asInstanceOf[TaskCall] val key = BackendJobDescriptorKey(call, Option(2), 1) val runtimeAttributes = makeRuntimeAttributes(call) val jobDescriptor = BackendJobDescriptor(workflowDescriptor, key, runtimeAttributes, Map.empty) @@ -628,14 +623,14 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend val jesBackend = testActorRef.underlyingActor - jesBackend.jesCallPaths.stdoutPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stdoutPath.toString shouldBe + jesBackend.jesCallPaths.stdout should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stdout.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stdout.log" - jesBackend.jesCallPaths.stderrPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.stderrPath.toString shouldBe + jesBackend.jesCallPaths.stderr should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.stderr.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2-stderr.log" - jesBackend.jesCallPaths.jesLogPath should be(a[NioGcsPath]) - jesBackend.jesCallPaths.jesLogPath.toString shouldBe + jesBackend.jesCallPaths.jesLogPath should be(a[CloudStoragePath]) + jesBackend.jesCallPaths.jesLogPath.toUri.toString shouldBe "gs://path/to/gcs_root/w/e6236763-c518-41d0-9688-432549a8bf7d/call-B/shard-2/B-2.log" } @@ -662,7 +657,7 @@ class JesAsyncBackendJobExecutionActorSpec extends TestKitSuite("JesAsyncBackend descriptorWithMax2AndKey2.preemptible shouldBe true } - private def makeRuntimeAttributes(job: Call) = { + private def makeRuntimeAttributes(job: TaskCall) = { val evaluatedAttributes = RuntimeAttributeDefinition.evaluateRuntimeAttributes(job.task.runtimeAttributes, TestableJesExpressionFunctions, Map.empty) RuntimeAttributeDefinition.addDefaultsToAttributes(JesBackendLifecycleActorFactory.staticRuntimeAttributeDefinitions, NoOptions)(evaluatedAttributes.get) // Fine to throw the exception if this "get" fails. This is a test after all! } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala index 17c33d0de..0eec23cbe 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesAttributesSpec.scala @@ -16,18 +16,19 @@ class JesAttributesSpec extends FlatSpec with Matchers { it should "parse correct JES config" taggedAs IntegrationTest in { val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "")) + val backendConfig = ConfigFactory.parseString(configString()) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) jesAttributes.project should be("myProject") jesAttributes.executionBucket should be("gs://myBucket") jesAttributes.maxPollingInterval should be(600) + jesAttributes.computeServiceAccount should be("default") } it should "parse correct preemptible config" taggedAs IntegrationTest in { val googleConfig = GoogleConfiguration(JesGlobalConfig) - val backendConfig = ConfigFactory.parseString(configString.replace("[PREEMPTIBLE]", "preemptible = 3")) + val backendConfig = ConfigFactory.parseString(configString(preemptible = "preemptible = 3")) val jesAttributes = JesAttributes(googleConfig, backendConfig) jesAttributes.endpointUrl should be(new URL("http://myEndpoint")) @@ -36,6 +37,14 @@ class JesAttributesSpec extends FlatSpec with Matchers { jesAttributes.maxPollingInterval should be(600) } + it should "parse compute service account" taggedAs IntegrationTest in { + val googleConfig = GoogleConfiguration(JesGlobalConfig) + val backendConfig = ConfigFactory.parseString(configString(genomics = """compute-service-account = "testing" """)) + + val jesAttributes = JesAttributes(googleConfig, backendConfig) + jesAttributes.computeServiceAccount should be("testing") + } + it should "not parse invalid config" taggedAs IntegrationTest in { val nakedConfig = ConfigFactory.parseString( @@ -60,17 +69,18 @@ class JesAttributesSpec extends FlatSpec with Matchers { errorsList should contain("no protocol: myEndpoint") } - val configString = - """ + def configString(preemptible: String = "", genomics: String = "") = + s""" |{ | project = "myProject" | root = "gs://myBucket" | maximum-polling-interval = 600 - | [PREEMPTIBLE] + | $preemptible | genomics { | // A reference to an auth defined in the `google` stanza at the top. This auth is used to create | // Pipelines and manipulate auth JSONs. | auth = "application-default" + | $genomics | endpoint-url = "http://myEndpoint" | } | diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala index d27328706..58841f756 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesCallPathsSpec.scala @@ -1,13 +1,12 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import scala.concurrent.ExecutionContext.Implicits.global -import cromwell.backend.impl.jes.MockObjects._ -class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { +class JesCallPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ @@ -19,8 +18,8 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, - jesConfiguration, mockCredentials) + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, + jesConfiguration) callPaths.returnCodeFilename should be("hello-rc.txt") callPaths.stderrFilename should be("hello-stderr.log") callPaths.stdoutFilename should be("hello-stdout.log") @@ -32,16 +31,15 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.returnCodePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") - callPaths.stdoutPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") - callPaths.stderrPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") - callPaths.jesLogPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello/hello.log") + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration) + callPaths.returnCode.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-rc.txt") + callPaths.stdout.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stdout.log") + callPaths.stderr.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello-stderr.log") + callPaths.jesLogPath.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello/hello.log") } it should "map the correct call context" in { @@ -49,10 +47,9 @@ class JesCallPathsSpec extends FlatSpec with Matchers with Mockito { val jobDescriptorKey = firstJobDescriptorKey(workflowDescriptor) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val callPaths = JesCallPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration, - mockCredentials) - callPaths.callContext.root.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/call-hello") + val callPaths = JesJobPaths(jobDescriptorKey, workflowDescriptor, jesConfiguration) + callPaths.callContext.root.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/call-hello") callPaths.callContext.stdout should be("hello-stdout.log") callPaths.callContext.stderr should be("hello-stderr.log") } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala index c2b77d38c..46068343c 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesConfigurationSpec.scala @@ -1,16 +1,25 @@ package cromwell.backend.impl.jes +import better.files.File import com.typesafe.config.{ConfigValueFactory, ConfigFactory} import cromwell.backend.BackendConfigurationDescriptor import org.scalatest.prop.TableDrivenPropertyChecks -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} -class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks { +class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenPropertyChecks with BeforeAndAfterAll { behavior of "JesConfigurationSpec" + val mockFile = File.newTemporaryFile() + + override def afterAll(): Unit = { + mockFile.delete(true) + + () + } + val globalConfig = ConfigFactory.parseString( - """ + s""" |google { | | application-name = "cromwell" @@ -24,13 +33,13 @@ class JesConfigurationSpec extends FlatSpec with Matchers with TableDrivenProper | name = "user-via-refresh" | scheme = "refresh_token" | client-id = "secret_id" - | client-secret = "secret_secret" + | client-secret = "${mockFile.pathAsString}" | }, | { | name = "service-account" | scheme = "service_account" | service-account-id = "my-service-account" - | pem-file = "/path/to/file.pem" + | pem-file = "${mockFile.pathAsString}" | } | ] |} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala index 4699402bc..73b8cb6de 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesInitializationActorSpec.scala @@ -7,14 +7,16 @@ import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.BackendWorkflowInitializationActor.{InitializationFailed, InitializationSuccess, Initialize} import cromwell.backend.impl.jes.authentication.GcsLocalizing import cromwell.backend.{BackendConfigurationDescriptor, BackendSpec, BackendWorkflowDescriptor} +import cromwell.core.Tags.IntegrationTest import cromwell.core.logging.LoggingTest._ import cromwell.core.{TestKitSuite, WorkflowOptions} -import cromwell.filesystems.gcs.{RefreshTokenMode, SimpleClientSecrets} +import cromwell.filesystems.gcs.GoogleConfiguration +import cromwell.filesystems.gcs.auth.{RefreshTokenMode, SimpleClientSecrets} import cromwell.util.{EncryptionSpec, SampleWdl} import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito import spray.json._ -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -38,7 +40,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -136,17 +138,17 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val refreshTokenConfig = ConfigFactory.parseString(refreshTokenConfigTemplate) - private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getJesBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(JesInitializationActor.props(workflowDescriptor, calls, new JesConfiguration(conf), emptyActor)) } behavior of "JesInitializationActor" - it should "log a warning message when there are unsupported runtime attributes" in { + it should "log a warning message when there are unsupported runtime attributes" taggedAs IntegrationTest in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { docker: "ubuntu/latest" test: true }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) val eventPattern = "Key/s [test] is/are not supported by JesBackend. Unsupported attributes will not be part of jobs executions." @@ -163,7 +165,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe it should "return InitializationFailed when docker runtime attribute key is not present" in { within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { }""") - val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, + val backend = getJesBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, defaultBackendConfig) backend ! Initialize expectMsgPF() { @@ -182,7 +184,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe private def buildJesInitializationTestingBits(backendConfig: Config = dockerBackendConfig): TestingBits = { val workflowOptions = WorkflowOptions.fromMap(Map("refresh_token" -> "mytoken")).get val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource(), options = workflowOptions) - val calls = workflowDescriptor.workflowNamespace.workflow.calls + val calls = workflowDescriptor.workflow.taskCalls val backendConfigurationDescriptor = BackendConfigurationDescriptor(backendConfig, globalConfig) val jesConfiguration = new JesConfiguration(backendConfigurationDescriptor) @@ -197,7 +199,7 @@ class JesInitializationActorSpec extends TestKitSuite("JesInitializationActorSpe val TestingBits(actorRef, _) = buildJesInitializationTestingBits(refreshTokenConfig) val actor = actorRef.underlyingActor - actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret"), "mytoken"))) + actor.refreshTokenAuth should be(Some(GcsLocalizing(RefreshTokenMode("user-via-refresh", "secret_id", "secret_secret", GoogleConfiguration.GoogleScopes), "mytoken"))) } it should "generate the correct json content for no docker token and no refresh token" in { diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala new file mode 100644 index 000000000..2c0853718 --- /dev/null +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesJobExecutionActorSpec.scala @@ -0,0 +1,111 @@ +package cromwell.backend.impl.jes + +import akka.actor.{Actor, ActorRef, Props} +import akka.testkit.{TestActorRef, TestProbe} +import cromwell.backend.BackendJobDescriptor +import cromwell.core.TestKitSuite +import org.scalatest.{FlatSpecLike, Matchers} +import org.specs2.mock.Mockito + +import scala.concurrent.duration._ +import akka.testkit._ +import cromwell.backend.BackendJobExecutionActor.{ExecuteJobCommand, JobFailedNonRetryableResponse} +import cromwell.backend.impl.jes.ControllableFailingJabjea.JabjeaExplode + +import scala.concurrent.{ExecutionContext, Promise} + +class JesJobExecutionActorSpec extends TestKitSuite("JesJobExecutionActorSpec") with FlatSpecLike with Matchers with Mockito { + + behavior of "JesJobExecutionActor" + + private val AwaitAlmostNothing = 100.milliseconds.dilated + private val TimeoutDuration = 10.seconds.dilated + implicit val ec: ExecutionContext = system.dispatcher + + it should "catch failures in JABJEA initialization and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + val parent = TestProbe() + val deathwatch = TestProbe() + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor, Props(new ConstructorFailingJABJEA))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(jobKey, e, errorCode) => + e.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } + + it should "catch failures at a random point during JABJEA processing and fail the job accordingly" in { + val jobDescriptor = mock[BackendJobDescriptor] + val jesWorkflowInfo = mock[JesConfiguration] + val initializationData = mock[JesBackendInitializationData] + val serviceRegistryActor = system.actorOf(Props.empty) + val jesBackendSingletonActor = Option(system.actorOf(Props.empty)) + + val parent = TestProbe() + val deathwatch = TestProbe() + val jabjeaConstructionPromise = Promise[ActorRef]() + val testJJEA = TestActorRef[TestJesJobExecutionActor]( + props = Props(new TestJesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor, Props(new ControllableFailingJabjea(jabjeaConstructionPromise)))), + supervisor = parent.ref) + deathwatch watch testJJEA + + // Nothing happens: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + + testJJEA.tell(msg = ExecuteJobCommand, sender = parent.ref) + + // Wait for the JABJEA to be spawned. Then kill it: + parent.expectNoMsg(max = AwaitAlmostNothing) + deathwatch.expectNoMsg(max = AwaitAlmostNothing) + jabjeaConstructionPromise.future foreach { _ ! JabjeaExplode } + + parent.expectMsgPF(max = TimeoutDuration) { + case JobFailedNonRetryableResponse(jobKey, e, errorCode) => + e.getMessage should be("JesAsyncBackendJobExecutionActor failed and didn't catch its exception.") + } + } +} + +class TestJesJobExecutionActor(jobDescriptor: BackendJobDescriptor, + jesWorkflowInfo: JesConfiguration, + initializationData: JesBackendInitializationData, + serviceRegistryActor: ActorRef, + jesBackendSingletonActor: Option[ActorRef], + fakeJabjeaProps: Props) extends JesJobExecutionActor(jobDescriptor, jesWorkflowInfo, initializationData, serviceRegistryActor, jesBackendSingletonActor) { + override def jabjeaProps: Props = fakeJabjeaProps +} + +class ConstructorFailingJABJEA extends ControllableFailingJabjea(Promise[ActorRef]()) { + // Explode immediately in the constructor: + explode() +} + +class ControllableFailingJabjea(constructionPromise: Promise[ActorRef]) extends Actor { + def explode() = { + val boom = 1 == 1 + if (boom) throw new RuntimeException("Test Exception! Don't panic if this appears during a test run!") + } + constructionPromise.trySuccess(self) + override def receive = { + case JabjeaExplode => explode() + } +} + +object ControllableFailingJabjea { + case object JabjeaExplode +} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala index 03f17d65b..25f061387 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesTestConfig.scala @@ -48,5 +48,5 @@ object JesTestConfig { val JesBackendConfig = ConfigFactory.parseString(JesBackendConfigString) val JesGlobalConfig = ConfigFactory.parseString(JesGlobalConfigString) - val JesBackendConfigurationDescriptor = new BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) + val JesBackendConfigurationDescriptor = BackendConfigurationDescriptor(JesBackendConfig, JesGlobalConfig) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala index 3f1dea365..48dd3d74c 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/JesWorkflowPathsSpec.scala @@ -1,12 +1,12 @@ package cromwell.backend.impl.jes import cromwell.backend.BackendSpec +import cromwell.core.TestKitSuite import cromwell.util.SampleWdl -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{FlatSpecLike, Matchers} import org.specs2.mock.Mockito -import cromwell.backend.impl.jes.MockObjects._ -class JesWorkflowPathsSpec extends FlatSpec with Matchers with Mockito { +class JesWorkflowPathsSpec extends TestKitSuite with FlatSpecLike with Matchers with Mockito { import BackendSpec._ import JesTestConfig._ @@ -16,11 +16,11 @@ class JesWorkflowPathsSpec extends FlatSpec with Matchers with Mockito { val workflowDescriptor = buildWorkflowDescriptor(SampleWdl.HelloWorld.wdlSource()) val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) - val workflowPaths = JesWorkflowPaths(workflowDescriptor, jesConfiguration, mockCredentials)(scala.concurrent.ExecutionContext.global) - workflowPaths.rootPath.toString should be("gs://my-cromwell-workflows-bucket") - workflowPaths.workflowRootPath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}") - workflowPaths.gcsAuthFilePath.toString should - be(s"gs://my-cromwell-workflows-bucket/hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") + val workflowPaths = JesWorkflowPaths(workflowDescriptor, jesConfiguration)(system) + workflowPaths.executionRoot.toUri.toString should be("gs://my-cromwell-workflows-bucket/") + workflowPaths.workflowRoot.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/") + workflowPaths.gcsAuthFilePath.toUri.toString should + be(s"gs://my-cromwell-workflows-bucket/wf_hello/${workflowDescriptor.id}/${workflowDescriptor.id}_auth.json") } } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala deleted file mode 100644 index 1cde38c47..000000000 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/MockObjects.scala +++ /dev/null @@ -1,9 +0,0 @@ -package cromwell.backend.impl.jes - -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential -import cromwell.backend.impl.jes.authentication.JesCredentials - -object MockObjects { - val mockCredential = new MockGoogleCredential.Builder().build() - val mockCredentials = JesCredentials(mockCredential, mockCredential) -} diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala index 39430abb2..5398ed66c 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/RunSpec.scala @@ -3,13 +3,11 @@ package cromwell.backend.impl.jes import java.time.OffsetDateTime import java.util -import com.google.api.client.googleapis.testing.auth.oauth2.MockGoogleCredential import com.google.api.client.util.ArrayMap -import com.google.api.services.genomics.Genomics import com.google.api.services.genomics.model.Operation +import cromwell.core.ExecutionEvent import org.scalatest.{FlatSpec, Matchers} import org.specs2.mock.{Mockito => MockitoTrait} -import cromwell.core.ExecutionEvent import scala.collection.JavaConverters._ @@ -36,8 +34,6 @@ class RunSpec extends FlatSpec with Matchers with MockitoTrait { op.setMetadata(metadata.asJava) - val mockedCredentials = new MockGoogleCredential.Builder().build() - val genomics = new Genomics(mockedCredentials.getTransport, mockedCredentials.getJsonFactory, mockedCredentials) val list = Run.getEventList(op) list should contain theSameElementsAs List( ExecutionEvent("waiting for quota", OffsetDateTime.parse("2015-12-05T00:00:00+00:00")), diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala index 1eaa42297..c7434419a 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesApiQueryManagerSpec.scala @@ -2,7 +2,7 @@ package cromwell.backend.impl.jes.statuspolling import akka.actor.{ActorRef, Props} import akka.testkit.{TestActorRef, TestProbe} -import cromwell.backend.impl.jes.Run +import cromwell.backend.impl.jes.{JesConfiguration, Run} import cromwell.core.TestKitSuite import org.scalatest.{FlatSpecLike, Matchers} @@ -65,49 +65,35 @@ class JesApiQueryManagerSpec extends TestKitSuite("JesApiQueryManagerSpec") with } AkkaTestUtil.actorDeathMethods(system) foreach { case (name, stopMethod) => - it should s"catch polling actors if they $name and then recreate them" in { - + /* + This test creates two statusPoller ActorRefs which are handed to the TestJesApiQueryManager. Work is added to that query + manager and then the first statusPoller requests work and is subsequently killed. The expectation is that: + + - The work will return to the workQueue of the query manager + - The query manager will have registered a new statusPoller + - That statusPoller is the second ActorRef (and artifact of TestJesApiQueryManager) + */ + it should s"catch polling actors if they $name, recreate them and add work back to the queue" in { val statusPoller1 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) - val statusPoller2 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor())) + val statusPoller2 = TestActorRef(Props(new AkkaTestUtil.DeathTestActor()), TestActorRef(new AkkaTestUtil.StoppingSupervisor())) val jaqmActor: TestActorRef[TestJesApiQueryManager] = TestActorRef(TestJesApiQueryManager.props(statusPoller1, statusPoller2)) - val statusRequesters = ((0 until BatchSize * 2) map { i => i -> TestProbe(name = s"StatusRequester_$i") }).toMap + val emptyActor = system.actorOf(Props.empty) - // Send a few status poll requests: + // Send a few status poll requests: BatchSize indexedTimes { index => - val probe = statusRequesters(index) - jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = probe.ref) - } - BatchSize indexedTimes { i => - val index = i + BatchSize // For the second half of the statusRequester set - val probe = statusRequesters(index) - jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = probe.ref) + jaqmActor.tell(msg = JesApiQueryManager.DoPoll(Run(index.toString, null)), sender = emptyActor) } - // Request a set of work from the middle of the queue: - val batchOffset = 2 - jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(batchOffset), sender = statusPoller1) jaqmActor.tell(msg = JesApiQueryManager.RequestJesPollingWork(BatchSize), sender = statusPoller1) - // Kill the original status poller: stopMethod(statusPoller1) - // Only the appropriate requesters get an error: - (0 until batchOffset) foreach { index => - val probe = statusRequesters(index) - probe.expectNoMsg(max = AwaitAlmostNothing) - } - (batchOffset until batchOffset + BatchSize) foreach { index => - val probe = statusRequesters(index) - probe.expectMsg(max = TestExecutionTimeout, hint = s"Polling error to requester #$index", obj = JesPollingActor.JesPollError) - } - (batchOffset + BatchSize until 2 * BatchSize) foreach { index => - val probe = statusRequesters(index) - probe.expectNoMsg(max = AwaitAlmostNothing) + eventually { + jaqmActor.underlyingActor.testPollerCreations should be (2) + jaqmActor.underlyingActor.queueSize should be (BatchSize) + jaqmActor.underlyingActor.statusPollerEquals(statusPoller2) should be (true) } - - // Check the next status poller gets created: - eventually { jaqmActor.underlyingActor.testPollerCreations should be(2) } } } } @@ -122,8 +108,7 @@ object JesApiQueryManagerSpec { /** * This test class allows us to hook into the JesApiQueryManager's makeStatusPoller and provide our own TestProbes instead */ -class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryManager { - +class TestJesApiQueryManager(qps: Int, statusPollerProbes: ActorRef*) extends JesApiQueryManager(qps) { var testProbes: Queue[ActorRef] = _ var testPollerCreations: Int = _ @@ -133,7 +118,7 @@ class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryM } override private[statuspolling] def makeStatusPoller(): ActorRef = { - // Initialise the queue, if necessary: + // Initialize the queue, if necessary: if (testProbes == null) { init() } @@ -146,8 +131,14 @@ class TestJesApiQueryManager(statusPollerProbes: ActorRef*) extends JesApiQueryM testProbes = newQueue probe } + + def queueSize = workQueue.size + def statusPollerEquals(otherStatusPoller: ActorRef) = statusPoller == otherStatusPoller } object TestJesApiQueryManager { - def props(statusPollers: ActorRef*): Props = Props(new TestJesApiQueryManager(statusPollers: _*)) + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + + def props(statusPollers: ActorRef*): Props = Props(new TestJesApiQueryManager(jesConfiguration.qps, statusPollers: _*)) } diff --git a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala index b861cbf0f..aa29b0df3 100644 --- a/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala +++ b/supportedBackends/jes/src/test/scala/cromwell/backend/impl/jes/statuspolling/JesPollingActorSpec.scala @@ -13,7 +13,7 @@ import com.google.api.client.googleapis.batch.BatchRequest import com.google.api.client.googleapis.batch.json.JsonBatchCallback import com.google.api.client.googleapis.json.GoogleJsonError import com.google.api.services.genomics.model.Operation -import cromwell.backend.impl.jes.{Run, RunStatus} +import cromwell.backend.impl.jes.{JesConfiguration, Run, RunStatus} import cromwell.backend.impl.jes.statuspolling.JesApiQueryManager.JesStatusPollQuery import cromwell.backend.impl.jes.statuspolling.JesPollingActor.JesPollFailed import cromwell.backend.impl.jes.statuspolling.TestJesPollingActor.{CallbackFailure, CallbackSuccess, JesBatchCallbackResponse} @@ -29,9 +29,17 @@ class JesPollingActorSpec extends TestKitSuite("JesPollingActor") with FlatSpecL implicit val DefaultPatienceConfig = PatienceConfig(TestExecutionTimeout) val AwaitAlmostNothing = 30.milliseconds.dilated + import cromwell.backend.impl.jes.JesTestConfig.JesBackendConfigurationDescriptor + val jesConfiguration = new JesConfiguration(JesBackendConfigurationDescriptor) + var managerProbe: TestProbe = _ var jpActor: TestActorRef[TestJesPollingActor] = _ + it should "correctly calculate batch intervals" in { + JesPollingActor.determineBatchInterval(10) shouldBe 9.seconds + JesPollingActor.determineBatchInterval(100) shouldBe 1.second + } + it should "query for work and wait for a reply" in { managerProbe.expectMsgClass(max = TestExecutionTimeout, c = classOf[JesApiQueryManager.RequestJesPollingWork]) managerProbe.expectNoMsg(max = AwaitAlmostNothing) @@ -77,7 +85,7 @@ class JesPollingActorSpec extends TestKitSuite("JesPollingActor") with FlatSpecL before { managerProbe = TestProbe() - jpActor = TestActorRef(TestJesPollingActor.props(managerProbe.ref), managerProbe.ref) + jpActor = TestActorRef(TestJesPollingActor.props(managerProbe.ref, jesConfiguration), managerProbe.ref) } } @@ -94,8 +102,9 @@ object JesPollingActorSpec extends Mockito { * - Mocks out the methods which actually call out to JES, and allows the callbacks to be triggered in a testable way * - Also waits a **lot** less time before polls! */ -class TestJesPollingActor(manager: ActorRef) extends JesPollingActor(manager) with Mockito { - override val BatchInterval = 10.milliseconds +class TestJesPollingActor(manager: ActorRef, qps: Int) extends JesPollingActor(manager, qps) with Mockito { + + override lazy val batchInterval = 10.milliseconds var operationStatusResponses: Queue[RunStatus] = Queue.empty var resultHandlers: Queue[JsonBatchCallback[Operation]] = Queue.empty @@ -123,7 +132,7 @@ class TestJesPollingActor(manager: ActorRef) extends JesPollingActor(manager) wi } object TestJesPollingActor { - def props(manager: ActorRef) = Props(new TestJesPollingActor(manager)) + def props(manager: ActorRef, jesConfiguration: JesConfiguration) = Props(new TestJesPollingActor(manager, jesConfiguration.qps)) sealed trait JesBatchCallbackResponse case object CallbackSuccess extends JesBatchCallbackResponse diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala index ad9761bca..a47209c20 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigAsyncJobExecutionActor.scala @@ -48,15 +48,20 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * @param taskName The name of the task to retrieve from the precomputed wdl namespace. * @param inputs The customized inputs to this task. */ - def writeTaskScript(script: File, taskName: String, inputs: CallInputs): Unit = { + def writeTaskScript(script: File, taskName: String, inputs: WorkflowCoercedInputs): Unit = { val task = configInitializationData.wdlNamespace.findTask(taskName). getOrElse(throw new RuntimeException(s"Unable to find task $taskName")) - val command = task.instantiateCommand(inputs, NoFunctions).get + val inputsWithFqns = inputs map { case (k, v) => s"$taskName.$k" -> v } + val command = task.instantiateCommand(task.inputsFromMap(inputsWithFqns), NoFunctions).get jobLogger.info(s"executing: $command") - script.write( - s"""|#!/bin/bash - |$command - |""".stripMargin) + val scriptBody = + s""" + +#!/bin/bash +$command + +""".trim + "\n" + script.write(scriptBody) () } @@ -64,7 +69,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * The inputs that are not specified by the config, that will be passed into a command for both submit and * submit-docker. */ - private lazy val standardInputs: CallInputs = { + private lazy val standardInputs: WorkflowCoercedInputs = { Map( JobNameInput -> WdlString(jobName), CwdInput -> WdlString(jobPaths.callRoot.toString), @@ -77,7 +82,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut /** * Extra arguments if this is a submit-docker command, or Map.empty. */ - private lazy val dockerInputs: CallInputs = { + private lazy val dockerInputs: WorkflowCoercedInputs = { if (isDockerRun) { Map( DockerCwdInput -> WdlString(jobPaths.callDockerRoot.toString) @@ -91,7 +96,7 @@ sealed trait ConfigAsyncJobExecutionActor extends SharedFileSystemAsyncJobExecut * The arguments generated from the backend config's list of attributes. These will include things like CPU, memory, * and other custom arguments like "backend_queue_name", "backend_billing_project", etc. */ - private lazy val runtimeAttributeInputs: CallInputs = { + private lazy val runtimeAttributeInputs: WorkflowCoercedInputs = { val declarationValidations = configInitializationData.declarationValidations val inputOptions = declarationValidations map { declarationValidation => declarationValidation.extractWdlValueOption(validatedRuntimeAttributes) map { wdlValue => diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala index 0fc377aac..f8ff92cfd 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigBackendFileHashing.scala @@ -3,13 +3,21 @@ package cromwell.backend.impl.sfs.config import akka.event.LoggingAdapter import better.files._ import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.core.path.DefaultPathBuilder import cromwell.util.TryWithResource._ +import scala.language.postfixOps import scala.util.Try private[config] object ConfigBackendFileHashing { - def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = - tryWithResource(() => File(request.file.valueString).newInputStream) { inputStream => + def getMd5Result(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] ={ + val path = DefaultPathBuilder.build(request.file.valueString) recover { + case failure => throw new RuntimeException("Failed to construct path to hash", failure) + } get + + tryWithResource(() => File(path).newInputStream) { inputStream => org.apache.commons.codec.digest.DigestUtils.md5Hex(inputStream) } + } + } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala index 6261e65c9..f453cf315 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategy.scala @@ -4,13 +4,15 @@ import akka.event.LoggingAdapter import better.files.File import com.typesafe.config.Config import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.sfs.SharedFileSystemBackendInitializationData +import cromwell.core.path.PathFactory import cromwell.util.TryWithResource._ import cromwell.util.FileUtil._ import net.ceedubs.ficus.Ficus._ import org.apache.commons.codec.digest.DigestUtils import org.slf4j.LoggerFactory -import scala.util.Try +import scala.util.{Failure, Try} object ConfigHashingStrategy { val logger = LoggerFactory.getLogger(getClass) @@ -37,14 +39,22 @@ abstract class ConfigHashingStrategy { protected lazy val checkSiblingMessage = if (checkSiblingMd5) "Check first for sibling md5 and if not found " else "" def getHash(request: SingleFileHashRequest, log: LoggingAdapter): Try[String] = { - val file = File(request.file.valueString).followSymlinks + def usingSFSInitData(initData: SharedFileSystemBackendInitializationData) = { + val pathBuilders = initData.workflowPaths.pathBuilders + val file = PathFactory.buildFile(request.file.valueString, pathBuilders).followSymlinks - if (checkSiblingMd5) { - precomputedMd5(file) match { - case Some(md5) => Try(md5.contentAsString) - case None => hash(file) - } - } else hash(file) + if (checkSiblingMd5) { + precomputedMd5(file) match { + case Some(md5) => Try(md5.contentAsString) + case None => hash(file) + } + } else hash(file) + } + + request.initializationData match { + case Some(initData: SharedFileSystemBackendInitializationData) => usingSFSInitData(initData) + case _ => Failure(new IllegalArgumentException("Need SharedFileSystemBackendInitializationData to calculate hash.")) + } } private def precomputedMd5(file: File): Option[File] = { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala index cb56e35a0..161768da1 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/ConfigWdlNamespace.scala @@ -44,7 +44,7 @@ class ConfigWdlNamespace(backendConfig: Config) { */ val wdlNamespace = { try { - WdlNamespace.load(wdlSource) + WdlNamespace.loadUsingSource(wdlSource, None, None) } catch { case exception: Exception => throw new RuntimeException(s"Error parsing generated wdl:\n$wdlSource".stripMargin, exception) @@ -74,7 +74,7 @@ object ConfigWdlNamespace { private def makeTask(taskName: String, command: String, declarations: String): Task = { val wdlSource = makeWdlSource(taskName, command, declarations) - val wdlNamespace = WdlNamespace.load(wdlSource) + val wdlNamespace = WdlNamespace.loadUsingSource(wdlSource, None, None) wdlNamespace.findTask(taskName).getOrElse(throw new RuntimeException(s"Couldn't find task $taskName")) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala index ec3aecbf7..37a42a5c3 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/impl/sfs/config/DeclarationValidation.scala @@ -24,7 +24,7 @@ object DeclarationValidation { * @return The DeclarationValidation object for the declaration. */ def fromDeclaration(declaration: Declaration): DeclarationValidation = { - declaration.name match { + declaration.unqualifiedName match { // Docker and CPU are special keys understood by cromwell. case DockerValidation.key => new DeclarationValidation(declaration, DockerValidation.instance) case CpuValidation.key => new DeclarationValidation(declaration, CpuValidation.default) @@ -34,11 +34,11 @@ object DeclarationValidation { // All other declarations must be a Boolean, Float, Integer, or String. case _ => val validator: PrimitiveRuntimeAttributesValidation[_] = declaration.wdlType match { - case WdlBooleanType => new BooleanRuntimeAttributesValidation(declaration.name) - case WdlFloatType => new FloatRuntimeAttributesValidation(declaration.name) - case WdlIntegerType => new IntRuntimeAttributesValidation(declaration.name) - case WdlStringType => new StringRuntimeAttributesValidation(declaration.name) - case other => throw new RuntimeException(s"Unsupported config runtime attribute $other ${declaration.name}") + case WdlBooleanType => new BooleanRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlFloatType => new FloatRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlIntegerType => new IntRuntimeAttributesValidation(declaration.unqualifiedName) + case WdlStringType => new StringRuntimeAttributesValidation(declaration.unqualifiedName) + case other => throw new RuntimeException(s"Unsupported config runtime attribute $other ${declaration.unqualifiedName}") } new DeclarationValidation(declaration, validator) } @@ -52,7 +52,7 @@ object DeclarationValidation { * @param instanceValidation A basic instance validation for the declaration. */ class DeclarationValidation(declaration: Declaration, instanceValidation: RuntimeAttributesValidation[_]) { - val key = declaration.name + val key = declaration.unqualifiedName /** * Creates a validation, by adding on defaults if they're specified in the declaration, and then making the @@ -74,7 +74,7 @@ class DeclarationValidation(declaration: Declaration, instanceValidation: Runtim val validationDefault = if (declaration.expression.isDefined) default(instanceValidation, declaration.expression.get) else instanceValidation - if (declaration.postfixQuantifier.contains("?")) validationDefault.optional else validationDefault + if (declaration.wdlType.isInstanceOf[WdlOptionalType]) validationDefault.optional else validationDefault } /** @@ -151,7 +151,7 @@ class MemoryDeclarationValidation(declaration: Declaration) } private lazy val declarationMemoryUnit: MemoryUnit = { - val suffix = memoryUnitSuffix(declaration.name) + val suffix = memoryUnitSuffix(declaration.unqualifiedName) val memoryUnitOption = MemoryUnit.values.find(_.suffixes.map(_.toLowerCase).contains(suffix.toLowerCase)) memoryUnitOption match { case Some(memoryUnit) => memoryUnit diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala deleted file mode 100644 index d96140014..000000000 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/GcsWorkflowFileSystemProvider.scala +++ /dev/null @@ -1,36 +0,0 @@ -package cromwell.backend.sfs - -import cats.data.Validated.{Invalid, Valid} -import cromwell.backend.wfs.{WorkflowFileSystemProvider, WorkflowFileSystemProviderParams} -import cromwell.filesystems.gcs.GoogleAuthMode.GoogleAuthOptions -import cromwell.filesystems.gcs.{GcsFileSystem, GcsFileSystemProvider, GoogleConfiguration} -import net.ceedubs.ficus.Ficus._ -import wdl4s.ValidationException - -import scala.util.Try - -object GcsWorkflowFileSystemProvider extends WorkflowFileSystemProvider { - override def fileSystemOption(params: WorkflowFileSystemProviderParams): Option[GcsFileSystem] = { - params.fileSystemConfig.as[Option[String]]("gcs.auth") map gcsFileSystem(params) - } - - private def gcsFileSystem(params: WorkflowFileSystemProviderParams)(gcsAuthName: String): GcsFileSystem = { - val workflowOptions = params.workflowOptions - val globalConfig = params.globalConfig - val googleConfig = GoogleConfiguration(globalConfig) - val googleAuthModeValidation = googleConfig.auth(gcsAuthName) - - val gcsAuthMode = googleAuthModeValidation match { - case Valid(googleAuthMode) => googleAuthMode - case Invalid(errors) => - throw new ValidationException("Could not create gcs filesystem from configuration", errors) - } - - val authOptions = new GoogleAuthOptions { - override def get(key: String): Try[String] = workflowOptions.get(key) - } - - val storage = gcsAuthMode.buildStorage(authOptions, googleConfig.applicationName) - GcsFileSystem(GcsFileSystemProvider(storage)(params.fileSystemExecutionContext)) - } -} diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala index 20c8c3d39..188391c45 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystem.scala @@ -1,22 +1,24 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Path, Paths} +import java.nio.file.{Path, Paths} import cats.instances.try_._ import cats.syntax.functor._ import com.typesafe.config.Config +import com.typesafe.scalalogging.StrictLogging import cromwell.backend.io.JobPaths import cromwell.core._ -import wdl4s.CallInputs +import cromwell.core.path.PathFactory +import cromwell.util.TryUtil +import wdl4s.EvaluatedTaskInputs import wdl4s.types.{WdlArrayType, WdlMapType} -import wdl4s.util.TryUtil import wdl4s.values._ import scala.collection.JavaConverters._ import scala.language.postfixOps import scala.util.{Failure, Success, Try} -object SharedFileSystem { +object SharedFileSystem extends StrictLogging { import better.files._ final case class AttemptedLookupResult(name: String, value: Try[WdlValue]) { @@ -42,31 +44,38 @@ object SharedFileSystem { } private def localizePathViaCopy(originalPath: File, executionPath: File): Try[Unit] = { - executionPath.parent.createDirectories() - val executionTmpPath = pathPlusSuffix(executionPath, ".tmp") - Try(originalPath.copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true)).void + val action = Try { + executionPath.parent.createDirectories() + val executionTmpPath = pathPlusSuffix(executionPath, ".tmp") + originalPath.copyTo(executionTmpPath, overwrite = true).moveTo(executionPath, overwrite = true) + }.void + logOnFailure(action, "copy") } private def localizePathViaHardLink(originalPath: File, executionPath: File): Try[Unit] = { - executionPath.parent.createDirectories() - // link.linkTo(target) returns target, - // however we want to return the link, not the target, so map the result back to executionPath - - // -Ywarn-value-discard - // Try(executionPath.linkTo(originalPath, symbolic = false)) map { _ => executionPath } - Try { executionPath.linkTo(originalPath, symbolic = false) } void + val action = Try { + executionPath.parent.createDirectories() + executionPath.linkTo(originalPath, symbolic = false) + }.void + logOnFailure(action, "hard link") } private def localizePathViaSymbolicLink(originalPath: File, executionPath: File): Try[Unit] = { if (originalPath.isDirectory) Failure(new UnsupportedOperationException("Cannot localize directory with symbolic links")) else { - executionPath.parent.createDirectories() - // -Ywarn-value-discard - // Try(executionPath.linkTo(originalPath, symbolic = true)) map { _ => executionPath } - Try { executionPath.linkTo(originalPath, symbolic = true) } void + val action = Try { + executionPath.parent.createDirectories() + executionPath.linkTo(originalPath, symbolic = true) + }.void + logOnFailure(action, "symbolic link") } } + private def logOnFailure(action: Try[Unit], actionLabel: String): Try[Unit] = { + if (action.isFailure) logger.warn(s"Localization via $actionLabel has failed: ${action.failed.get.getMessage}") + action + } + private def duplicate(description: String, source: File, dest: File, strategies: Stream[DuplicationStrategy]) = { import cromwell.util.FileUtil._ @@ -134,6 +143,9 @@ trait SharedFileSystem extends PathFactory { case array: WdlArray => val mappedArray = array.value map outputMapper(job) TryUtil.sequence(mappedArray) map { WdlArray(array.wdlType, _) } + case map: WdlMap => + val mappedMap = map.value mapValues outputMapper(job) + TryUtil.sequenceMap(mappedMap) map { WdlMap(map.wdlType, _) } case other => Success(other) } } @@ -145,11 +157,8 @@ trait SharedFileSystem extends PathFactory { /** * Return a possibly altered copy of inputs reflecting any localization of input file paths that might have * been performed for this `Backend` implementation. - * NOTE: This ends up being a backdoor implementation of Backend.adjustInputPaths as both LocalBackend and SgeBackend - * end up with this implementation and thus use it to satisfy their contract with Backend. - * This is yuck-tastic and I consider this a FIXME, but not for this refactor */ - def localizeInputs(inputsRoot: Path, docker: Boolean, filesystems: List[FileSystem], inputs: CallInputs): Try[CallInputs] = { + def localizeInputs(inputsRoot: Path, docker: Boolean)(inputs: EvaluatedTaskInputs): Try[EvaluatedTaskInputs] = { val strategies = if (docker) DockerLocalizers else Localizers // Use URI to identify protocol scheme and strip it out @@ -161,12 +170,13 @@ trait SharedFileSystem extends PathFactory { host map { h => Paths.get(h, uriPath) } getOrElse Paths.get(uriPath) } - /** + /* * Transform an original input path to a path in the call directory. * The new path matches the original path, it only "moves" the root to be the call directory. */ + def toCallPath(path: String): Try[PairOfFiles] = Try { - val src = buildFile(path, filesystems) + val src = buildFile(path) // Strip out potential prefix protocol val localInputPath = stripProtocolScheme(src.path) val dest = if (File(inputsRoot).isParentOf(localInputPath)) File(localInputPath) @@ -181,7 +191,7 @@ trait SharedFileSystem extends PathFactory { // Optional function to adjust the path to "docker path" if the call runs in docker val localizeFunction = localizeWdlValue(toCallPath, strategies.toStream) _ val localizedValues = inputs.toSeq map { - case (name, value) => localizeFunction(value) map { name -> _ } + case (declaration, value) => localizeFunction(value) map { declaration -> _ } } TryUtil.sequence(localizedValues, "Failures during localization").map(_.toMap) recover { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala index 3013b12af..ef5ef5e89 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemAsyncJobExecutionActor.scala @@ -1,6 +1,6 @@ package cromwell.backend.sfs -import java.nio.file.{FileAlreadyExistsException, Path, Paths} +import java.nio.file.{FileAlreadyExistsException, Path} import akka.actor.{Actor, ActorLogging, ActorRef} import akka.event.LoggingReceive @@ -12,9 +12,11 @@ import cromwell.backend.async.{AbortedExecutionHandle, AsyncBackendJobExecutionA import cromwell.backend.io.WorkflowPathsBackendInitializationData import cromwell.backend.sfs.SharedFileSystem._ import cromwell.backend.validation._ -import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, OutputEvaluator} -import cromwell.core.JobOutputs +import cromwell.backend.wdl.{OutputEvaluator, Command} +import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core.CallOutputs import cromwell.core.logging.JobLogging +import cromwell.core.path.DefaultPathBuilder import cromwell.core.retry.SimpleExponentialBackoff import cromwell.services.keyvalue.KeyValueServiceActor._ import wdl4s.values.{WdlArray, WdlFile, WdlMap, WdlValue} @@ -135,11 +137,13 @@ trait SharedFileSystemAsyncJobExecutionActor override lazy val backendInitializationDataOption = params.backendInitializationDataOption - def toDockerPath(path: WdlValue): WdlValue = { + def toUnixPath(docker: Boolean)(path: WdlValue): WdlValue = { path match { - case file: WdlFile => WdlFile(jobPaths.toDockerPath(Paths.get(path.valueString)).toString) - case array: WdlArray => WdlArray(array.wdlType, array.value map toDockerPath) - case map: WdlMap => WdlMap(map.wdlType, map.value mapValues toDockerPath) + case file: WdlFile => + val cleanPath = DefaultPathBuilder.build(path.valueString).get + WdlFile(if (docker) jobPaths.toDockerPath(cleanPath).toString else cleanPath.toString) + case array: WdlArray => WdlArray(array.wdlType, array.value map toUnixPath(docker)) + case map: WdlMap => WdlMap(map.wdlType, map.value mapValues toUnixPath(docker)) case wdlValue => wdlValue } } @@ -150,8 +154,8 @@ trait SharedFileSystemAsyncJobExecutionActor lazy val workflowDescriptor = jobDescriptor.workflowDescriptor lazy val call = jobDescriptor.key.call - lazy val fileSystems = WorkflowPathsBackendInitializationData.fileSystems(backendInitializationDataOption) - lazy val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, fileSystems) + lazy val pathBuilders = WorkflowPathsBackendInitializationData.pathBuilders(backendInitializationDataOption) + lazy val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, pathBuilders) override lazy val workflowId = jobDescriptor.workflowDescriptor.id override lazy val jobTag = jobDescriptor.key.tag @@ -165,12 +169,18 @@ trait SharedFileSystemAsyncJobExecutionActor } def instantiatedScript: String = { - val pathTransformFunction: WdlValue => WdlValue = if (isDockerRun) toDockerPath else identity - val tryCommand = sharedFileSystem.localizeInputs(jobPaths.callInputsRoot, - isDockerRun, fileSystems, jobDescriptor.inputs) flatMap { localizedInputs => - call.task.instantiateCommand(localizedInputs, callEngineFunction, pathTransformFunction) + val pathTransformFunction = toUnixPath(isDockerRun) _ + val localizer = sharedFileSystem.localizeInputs(jobPaths.callInputsRoot, isDockerRun) _ + + Command.instantiate( + jobDescriptor, + callEngineFunction, + localizer, + pathTransformFunction + ) match { + case Success(command) => command + case Failure(ex) => throw new RuntimeException("Failed to instantiate command line", ex) } - tryCommand.get } override def executeOrRecover(mode: ExecutionMode)(implicit ec: ExecutionContext) = { @@ -244,14 +254,19 @@ trait SharedFileSystemAsyncJobExecutionActor val rcPath = if (isDockerRun) jobPaths.toDockerPath(jobPaths.returnCode) else jobPaths.returnCode val rcTmpPath = s"$rcPath.tmp" - File(jobPaths.script).write( - s"""#!/bin/sh - |( - | cd $cwd - | $instantiatedCommand - |) - |echo $$? > $rcTmpPath - |mv $rcTmpPath $rcPath""".stripMargin) + val scriptBody = s""" + +#!/bin/sh +( + cd $cwd + $instantiatedCommand +) +echo $$? > $rcTmpPath +mv $rcTmpPath $rcPath + +""".trim + "\n" + + File(jobPaths.script).write(scriptBody) } /** @@ -313,7 +328,7 @@ trait SharedFileSystemAsyncJobExecutionActor def processReturnCode()(implicit ec: ExecutionContext): Future[ExecutionHandle] = { val returnCodeTry = Try(File(jobPaths.returnCode).contentAsString.stripLineEnd.toInt) - lazy val badReturnCodeMessage = s"Call ${call.fullyQualifiedName}: return code was ${returnCodeTry.getOrElse("(none)")}" + lazy val badReturnCodeMessage = s"Call ${jobDescriptor.key.tag}: return code was ${returnCodeTry.getOrElse("(none)")}" lazy val badReturnCodeResponse = Future.successful( FailedNonRetryableExecutionHandle(new Exception(badReturnCodeMessage), returnCodeTry.toOption)) @@ -323,7 +338,7 @@ trait SharedFileSystemAsyncJobExecutionActor def processSuccess(returnCode: Int) = { val successfulFuture = for { outputs <- Future.fromTry(processOutputs()) - } yield SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths.mapValues(_.toString), Seq.empty) + } yield SuccessfulExecutionHandle(outputs, returnCode, jobPaths.detritusPaths, Seq.empty) successfulFuture recover { case failed: Throwable => @@ -371,7 +386,7 @@ trait SharedFileSystemAsyncJobExecutionActor } } - private def processOutputs(): Try[JobOutputs] = { + private def processOutputs(): Try[CallOutputs] = { OutputEvaluator.evaluateOutputs(jobDescriptor, callEngineFunction, sharedFileSystem.outputMapper(jobPaths)) } } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala index edacd5303..1ac3093dc 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemBackendLifecycleActorFactory.scala @@ -1,11 +1,16 @@ package cromwell.backend.sfs import akka.actor.{ActorRef, Props} +import cats.data.Validated.{Invalid, Valid} import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor, BackendJobDescriptorKey, BackendLifecycleActorFactory, BackendWorkflowDescriptor} import cromwell.core.Dispatcher import cromwell.core.Dispatcher._ -import wdl4s.Call +import cromwell.core.path.{DefaultPathBuilderFactory, PathBuilderFactory} +import cromwell.filesystems.gcs.{GcsPathBuilderFactory, GoogleConfiguration} +import lenthall.exception.MessageAggregation +import net.ceedubs.ficus.Ficus._ +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions import scala.concurrent.Promise @@ -18,6 +23,23 @@ import scala.concurrent.Promise trait SharedFileSystemBackendLifecycleActorFactory extends BackendLifecycleActorFactory { /** + * If the backend sets a gcs authentication mode, try to create a PathBuilderFactory with it. + */ + lazy val gcsPathBuilderFactory: Option[GcsPathBuilderFactory] = { + configurationDescriptor.backendConfig.as[Option[String]]("filesystems.gcs.auth") map { configAuth => + GoogleConfiguration(configurationDescriptor.globalConfig).auth(configAuth) match { + case Valid(auth) => GcsPathBuilderFactory(auth) + case Invalid(error) => throw new MessageAggregation { + override def exceptionContext: String = "Failed to parse gcs auth configuration" + override def errorMessages: Traversable[String] = error.toList + } + } + } + } + + lazy val pathBuilderFactories: List[PathBuilderFactory] = List(gcsPathBuilderFactory, Option(DefaultPathBuilderFactory)).flatten + + /** * Config values for the backend, and a pointer to the global config. * * This is the single parameter passed into each factory during creation. @@ -41,10 +63,10 @@ trait SharedFileSystemBackendLifecycleActorFactory extends BackendLifecycleActor */ def asyncJobExecutionActorClass: Class[_ <: SharedFileSystemAsyncJobExecutionActor] - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], serviceRegistryActor: ActorRef) = { val params = SharedFileSystemInitializationActorParams(serviceRegistryActor, workflowDescriptor, - configurationDescriptor, calls) + configurationDescriptor, calls, pathBuilderFactories) Option(Props(initializationActorClass, params).withDispatcher(Dispatcher.BackendDispatcher)) } diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala index 62dc98b07..6ba44914d 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemCacheHitCopyingActor.scala @@ -1,10 +1,13 @@ package cromwell.backend.sfs -import java.nio.file.{Path, Paths} +import java.nio.file.Path import akka.actor.ActorRef import cromwell.backend.callcaching.CacheHitDuplicating import cromwell.backend.{BackendCacheHitCopyingActor, BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptor} +import cromwell.core.path.PathFactory + +import scala.util.Try class SharedFileSystemCacheHitCopyingActor(override val jobDescriptor: BackendJobDescriptor, override val configurationDescriptor: BackendConfigurationDescriptor, @@ -17,7 +20,7 @@ class SharedFileSystemCacheHitCopyingActor(override val jobDescriptor: BackendJo override lazy val destinationJobDetritusPaths = jobPaths.detritusPaths - override protected def getPath(file: String) = Paths.get(file) + override protected def getPath(file: String) = Try(PathFactory.buildPath(file, jobPaths.pathBuilders)) override protected def duplicate(source: Path, destination: Path) = { // -Ywarn-value-discard diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala index 1f73cac38..7dc5172ba 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemExpressionFunctions.scala @@ -1,12 +1,13 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystem, Path} +import java.nio.file.Path -import cromwell.backend.io.{JobPaths, WorkflowPathsBackendInitializationData} +import cromwell.backend.io.{JobPaths, JobPathsWithDocker, WorkflowPathsBackendInitializationData} import cromwell.backend.wdl._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendJobDescriptorKey, BackendWorkflowDescriptor} import cromwell.core.CallContext -import wdl4s.expression.WdlStandardLibraryFunctions +import cromwell.core.path.PathBuilder +import wdl4s.expression.PureStandardLibraryFunctionsLike import wdl4s.values.{WdlFile, WdlValue} import scala.language.postfixOps @@ -20,49 +21,50 @@ object SharedFileSystemExpressionFunctions { def apply(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, configurationDescriptor: BackendConfigurationDescriptor, - fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + pathBuilders: List[PathBuilder]): SharedFileSystemExpressionFunctions = { + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) + new SharedFileSystemExpressionFunctions(pathBuilders, callContext) } - def apply(jobPaths: JobPaths, fileSystems: List[FileSystem]): SharedFileSystemExpressionFunctions = { + def apply(jobPaths: JobPaths, pathBuilders: List[PathBuilder]): SharedFileSystemExpressionFunctions = { val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(fileSystems, callContext) + new SharedFileSystemExpressionFunctions(pathBuilders, callContext) } def apply(workflowDescriptor: BackendWorkflowDescriptor, configurationDescriptor: BackendConfigurationDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]) = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toString, jobPaths.stderr.toString ) - new SharedFileSystemExpressionFunctions(WorkflowPathsBackendInitializationData.fileSystems(initializationData), callContext) + new SharedFileSystemExpressionFunctions(WorkflowPathsBackendInitializationData.pathBuilders(initializationData), callContext) } } -class SharedFileSystemExpressionFunctions(override val fileSystems: List[FileSystem], +class SharedFileSystemExpressionFunctions(override val pathBuilders: List[PathBuilder], context: CallContext - ) extends WdlStandardLibraryFunctions with PureFunctions with ReadLikeFunctions with WriteFunctions { + ) extends PureStandardLibraryFunctionsLike with ReadLikeFunctions with WriteFunctions { import SharedFileSystemExpressionFunctions._ import better.files._ + override def writeTempFile(path: String, prefix: String, suffix: String, content: String): String = super[WriteFunctions].writeTempFile(path, prefix, suffix, content) override def globPath(glob: String) = context.root.toString override def glob(path: String, pattern: String): Seq[String] = { - File(toPath(path)).glob(s"**/$pattern") map { _.pathAsString } toSeq + File(context.root).glob(s"**/$pattern") map { _.pathAsString } toSeq } override val writeDirectory = context.root diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala index 54a370b39..52c8fbeaf 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemInitializationActor.scala @@ -4,10 +4,11 @@ import akka.actor.ActorRef import better.files._ import cromwell.backend.io.{WorkflowPaths, WorkflowPathsBackendInitializationData} import cromwell.backend.validation.RuntimeAttributesDefault -import cromwell.backend.wfs.{DefaultWorkflowFileSystemProvider, WorkflowFileSystemProvider} +import cromwell.backend.wfs.WorkflowPathBuilder import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} -import cromwell.core.{Dispatcher, WorkflowOptions} -import wdl4s.Call +import cromwell.core.WorkflowOptions +import cromwell.core.path.PathBuilderFactory +import wdl4s.TaskCall import wdl4s.values.WdlValue import scala.concurrent.Future @@ -18,7 +19,8 @@ case class SharedFileSystemInitializationActorParams serviceRegistryActor: ActorRef, workflowDescriptor: BackendWorkflowDescriptor, configurationDescriptor: BackendConfigurationDescriptor, - calls: Seq[Call] + calls: Set[TaskCall], + pathBuilderFactories: List[PathBuilderFactory] ) class SharedFileSystemBackendInitializationData @@ -37,7 +39,7 @@ class SharedFileSystemInitializationActor(params: SharedFileSystemInitialization override lazy val workflowDescriptor: BackendWorkflowDescriptor = params.workflowDescriptor override lazy val configurationDescriptor: BackendConfigurationDescriptor = params.configurationDescriptor - override lazy val calls: Seq[Call] = params.calls + override lazy val calls: Set[TaskCall] = params.calls override lazy val serviceRegistryActor: ActorRef = params.serviceRegistryActor def runtimeAttributesBuilder: SharedFileSystemValidatedRuntimeAttributesBuilder = @@ -49,11 +51,9 @@ class SharedFileSystemInitializationActor(params: SharedFileSystemInitialization ).toMap } - val providers = Seq(GcsWorkflowFileSystemProvider, DefaultWorkflowFileSystemProvider) - val ioDispatcher = context.system.dispatchers.lookup(Dispatcher.IoDispatcher) + val pathBuilders = params.pathBuilderFactories map { _.withOptions(workflowDescriptor.workflowOptions)(context.system) } - val workflowPaths = WorkflowFileSystemProvider.workflowPaths(configurationDescriptor, workflowDescriptor, - providers, ioDispatcher) + val workflowPaths = WorkflowPathBuilder.workflowPaths(configurationDescriptor, workflowDescriptor, pathBuilders) override def beforeAll(): Future[Option[BackendInitializationData]] = { Future.fromTry(Try { diff --git a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala index d9d4f4213..54453ab75 100644 --- a/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala +++ b/supportedBackends/sfs/src/main/scala/cromwell/backend/sfs/SharedFileSystemJobCachingActorHelper.scala @@ -4,7 +4,7 @@ import akka.actor.{Actor, ActorRef} import com.typesafe.config.{Config, ConfigFactory} import cromwell.backend.BackendInitializationData import cromwell.backend.callcaching.JobCachingActorHelper -import cromwell.backend.io.JobPaths +import cromwell.backend.io.{JobPathsWithDocker, WorkflowPathsBackendInitializationData} import cromwell.backend.validation.{RuntimeAttributesValidation, ValidatedRuntimeAttributes} import cromwell.core.logging.JobLogging import net.ceedubs.ficus.Ficus._ @@ -17,7 +17,7 @@ trait SharedFileSystemJobCachingActorHelper extends JobCachingActorHelper { def serviceRegistryActor: ActorRef lazy val jobPaths = - new JobPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + new JobPathsWithDocker(jobDescriptor.key, jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) lazy val initializationData = BackendInitializationData. as[SharedFileSystemBackendInitializationData](backendInitializationDataOption) @@ -37,6 +37,7 @@ trait SharedFileSystemJobCachingActorHelper extends JobCachingActorHelper { } lazy val sharedFileSystem = new SharedFileSystem { + override val pathBuilders = WorkflowPathsBackendInitializationData.pathBuilders(backendInitializationDataOption) override lazy val sharedFileSystemConfig = { configurationDescriptor.backendConfig.as[Option[Config]]("filesystems.local").getOrElse(ConfigFactory.empty()) } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala index 81edb6f60..bd25dfc51 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/impl/sfs/config/ConfigHashingStrategySpec.scala @@ -6,6 +6,9 @@ import akka.event.LoggingAdapter import better.files._ import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import cromwell.backend.callcaching.FileHashingActor.SingleFileHashRequest +import cromwell.backend.io.WorkflowPaths +import cromwell.backend.sfs.SharedFileSystemBackendInitializationData +import cromwell.core.path.DefaultPathBuilder import org.apache.commons.codec.digest.DigestUtils import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} @@ -43,7 +46,14 @@ class ConfigHashingStrategySpec extends FlatSpec with Matchers with TableDrivenP symLink } else file + val workflowPaths = mock[WorkflowPaths] + workflowPaths.pathBuilders returns List(DefaultPathBuilder) + + val initData = mock[SharedFileSystemBackendInitializationData] + initData.workflowPaths returns workflowPaths + request.file returns WdlFile(requestFile.pathAsString) + request.initializationData returns Option(initData) request } diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala index 64dbfa9d7..1cbfd2cfe 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemInitializationActorSpec.scala @@ -8,7 +8,7 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescript import cromwell.core.TestKitSuite import cromwell.core.logging.LoggingTest._ import org.scalatest.{Matchers, WordSpecLike} -import wdl4s.Call +import wdl4s.TaskCall import scala.concurrent.duration._ @@ -30,14 +30,14 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], + private def getActorRef(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { - val params = SharedFileSystemInitializationActorParams(emptyActor, workflowDescriptor, conf, calls) + val params = SharedFileSystemInitializationActorParams(emptyActor, workflowDescriptor, conf, calls, List.empty) val props = Props(new SharedFileSystemInitializationActor(params)) system.actorOf(props, "SharedFileSystemInitializationActor") } @@ -47,7 +47,7 @@ class SharedFileSystemInitializationActorSpec extends TestKitSuite("SharedFileSy within(Timeout) { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { unsupported: 1 }""") val conf = emptyBackendConfig - val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, conf) + val backend = getActorRef(workflowDescriptor, workflowDescriptor.workflow.taskCalls, conf) val pattern = "Key/s [unsupported] is/are not supported by backend. " + "Unsupported attributes will not be part of jobs executions." EventFilter.warning(pattern = escapePattern(pattern), occurrences = 1) intercept { diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala index f697a7f4b..cd5735766 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemJobExecutionActorSpec.scala @@ -5,17 +5,16 @@ import java.nio.file.{Files, Paths} import akka.testkit.TestDuration import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{AbortedResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.BackendLifecycleActor.AbortJobCommand import cromwell.backend.io.TestWorkflows._ -import cromwell.backend.io.{JobPaths, TestWorkflows} +import cromwell.backend.io.{JobPathsWithDocker, TestWorkflows} import cromwell.backend.sfs.TestLocalAsyncJobExecutionActor._ import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobDescriptorKey, BackendSpec, RuntimeAttributeDefinition} import cromwell.core.Tags._ import cromwell.core._ import cromwell.services.keyvalue.KeyValueServiceActor.{KvJobKey, KvPair, ScopedKey} import org.scalatest.concurrent.PatienceConfiguration.Timeout -import org.scalatest.mockito.MockitoSugar import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpecLike, OptionValues} import wdl4s.types._ @@ -25,17 +24,17 @@ import wdl4s.values._ import scala.concurrent.duration._ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSystemJobExecutionActorSpec") - with FlatSpecLike with BackendSpec with MockitoSugar with TableDrivenPropertyChecks with OptionValues { + with FlatSpecLike with BackendSpec with TableDrivenPropertyChecks with OptionValues { behavior of "SharedFileSystemJobExecutionActor" lazy val runtimeAttributeDefinitions = SharedFileSystemValidatedRuntimeAttributesBuilder.default.definitions.toSet def executeSpec(docker: Boolean) = { - val expectedOutputs: JobOutputs = Map( + val expectedOutputs: CallOutputs = Map( "salutation" -> JobOutput(WdlString("Hello you !")) ) - val expectedResponse = SucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), expectedOutputs, None, Seq.empty) val runtime = if (docker) """runtime { docker: "ubuntu:latest" }""" else "" val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = runtime) val workflow = TestWorkflow(workflowDescriptor, emptyBackendConfig, expectedResponse) @@ -52,7 +51,7 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst } it should "send back an execution failure if the task fails" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], new Exception(""), Option(1)) + val expectedResponse = JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], new Exception(""), Option(1)) val workflow = TestWorkflow(buildWorkflowDescriptor(GoodbyeWorld), emptyBackendConfig, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) testWorkflow(workflow, backend) @@ -82,11 +81,11 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val jsonInputFile = createCannedFile("localize", "content from json inputs").pathAsString val callInputFile = createCannedFile("localize", "content from call inputs").pathAsString val inputs = Map( - "inputFileFromCallInputs" -> WdlFile(callInputFile), - "inputFileFromJson" -> WdlFile(jsonInputFile) + "wf_localize.localize.inputFileFromCallInputs" -> WdlFile(callInputFile), + "wf_localize.localize.inputFileFromJson" -> WdlFile(jsonInputFile) ) - val expectedOutputs: JobOutputs = Map( + val expectedOutputs: CallOutputs = Map( "out" -> JobOutput(WdlArray(WdlArrayType(WdlStringType), Array( WdlString("content from json inputs"), @@ -106,10 +105,10 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val runtime = if (docker) """runtime { docker: "ubuntu:latest" } """ else "" val workflowDescriptor = buildWorkflowDescriptor(InputFiles, inputs, runtime = runtime) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions), conf) - val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) - val jobPaths = new JobPaths(workflowDescriptor, conf.backendConfig, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, conf.backendConfig) whenReady(backend.execute) { executionResponse => assertResponse(executionResponse, expectedResponse) @@ -157,7 +156,7 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst val backendRef = createBackendRef(jobDescriptor, emptyBackendConfig) val backend = backendRef.underlyingActor - val jobPaths = new JobPaths(workflowDescriptor, ConfigFactory.empty, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, ConfigFactory.empty) File(jobPaths.callExecutionRoot).createDirectories() File(jobPaths.stdout).write("Hello stubby ! ") File(jobPaths.stderr).touch() @@ -189,13 +188,13 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst whenReady(execute, Timeout(10.seconds.dilated)) { executionResponse => if (writeReturnCode) { - executionResponse should be(a[SucceededResponse]) - val succeededResponse = executionResponse.asInstanceOf[SucceededResponse] + executionResponse should be(a[JobSucceededResponse]) + val succeededResponse = executionResponse.asInstanceOf[JobSucceededResponse] succeededResponse.returnCode.value should be(0) succeededResponse.jobOutputs should be(Map("salutation" -> JobOutput(WdlString("Hello stubby !")))) } else { - executionResponse should be(a[FailedNonRetryableResponse]) - val failedResponse = executionResponse.asInstanceOf[FailedNonRetryableResponse] + executionResponse should be(a[JobFailedNonRetryableResponse]) + val failedResponse = executionResponse.asInstanceOf[JobFailedNonRetryableResponse] failedResponse.returnCode should be(empty) failedResponse.throwable should be(a[RuntimeException]) failedResponse.throwable.getMessage should startWith("Unable to determine that 0 is alive, and") @@ -219,20 +218,20 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "execute shards from a scatter" in { val workflowDescriptor = buildWorkflowDescriptor(TestWorkflows.Scatter) - val call = workflowDescriptor.workflowNamespace.workflow.calls.head + val call = workflowDescriptor.workflow.taskCalls.head 0 to 2 foreach { shard => // This assumes that engine will give us the evaluated value of the scatter item at the correct index // If this is not the case, more context/logic will need to be moved to the backend so it can figure it out by itself - val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("intNumber" -> WdlInteger(shard)) + val symbolMaps: Map[LocallyQualifiedName, WdlInteger] = Map("scattering.intNumber" -> WdlInteger(shard)) val runtimeAttributes = RuntimeAttributeDefinition.addDefaultsToAttributes(runtimeAttributeDefinitions, WorkflowOptions.empty)(call.task.runtimeAttributes.attrs) val jobDescriptor: BackendJobDescriptor = - BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, symbolMaps) + BackendJobDescriptor(workflowDescriptor, BackendJobDescriptorKey(call, Option(shard), 1), runtimeAttributes, fqnMapToDeclarationMap(symbolMaps)) val backend = createBackend(jobDescriptor, emptyBackendConfig) val response = - SucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty) + JobSucceededResponse(mock[BackendJobDescriptorKey], Some(0), Map("out" -> JobOutput(WdlInteger(shard))), None, Seq.empty) executeJobAndAssertOutputs(backend, response) } } @@ -240,12 +239,12 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst it should "post process outputs" in { val inputFile = createCannedFile("localize", "content from json inputs").pathAsString val inputs = Map { - "inputFile" -> WdlFile(inputFile) + "wf_localize.localize.inputFile" -> WdlFile(inputFile) } val workflowDescriptor = buildWorkflowDescriptor(OutputProcess, inputs) val jobDescriptor: BackendJobDescriptor = jobDescriptorFromSingleCallWorkflow(workflowDescriptor, inputs, WorkflowOptions.empty, runtimeAttributeDefinitions) val backend = createBackend(jobDescriptor, emptyBackendConfig) - val jobPaths = new JobPaths(workflowDescriptor, emptyBackendConfig.backendConfig, jobDescriptor.key) + val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, emptyBackendConfig.backendConfig) val expectedA = WdlFile(jobPaths.callExecutionRoot.resolve("a").toAbsolutePath.toString) val expectedB = WdlFile(jobPaths.callExecutionRoot.resolve("dir").toAbsolutePath.resolve("b").toString) val expectedOutputs = Map( @@ -253,13 +252,13 @@ class SharedFileSystemJobExecutionActorSpec extends TestKitSuite("SharedFileSyst "o2" -> JobOutput(WdlArray(WdlArrayType(WdlFileType), Seq(expectedA, expectedB))), "o3" -> JobOutput(WdlFile(inputFile)) ) - val expectedResponse = SucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) + val expectedResponse = JobSucceededResponse(jobDescriptor.key, Some(0), expectedOutputs, None, Seq.empty) executeJobAndAssertOutputs(backend, expectedResponse) } - it should "fail post processing if an output fail is not found" in { - val expectedResponse = FailedNonRetryableResponse(mock[BackendJobDescriptorKey], + it should "fail post processing if an output file is not found" in { + val expectedResponse = JobFailedNonRetryableResponse(mock[BackendJobDescriptorKey], AggregatedException(Seq.empty, "Could not process output, file not found"), Option(0)) val workflow = TestWorkflow(buildWorkflowDescriptor(MissingOutputProcess), emptyBackendConfig, expectedResponse) val backend = createBackend(jobDescriptorFromSingleCallWorkflow(workflow.workflowDescriptor, Map.empty, WorkflowOptions.empty, runtimeAttributeDefinitions), workflow.config) diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala index c3874a14a..d7c39924c 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/SharedFileSystemSpec.scala @@ -1,22 +1,24 @@ package cromwell.backend.sfs -import java.nio.file.{FileSystems, Files} +import java.nio.file.Files import better.files._ import com.typesafe.config.{Config, ConfigFactory} +import cromwell.core.path.DefaultPathBuilder +import cromwell.backend.BackendSpec import org.scalatest.prop.TableDrivenPropertyChecks import org.scalatest.{FlatSpec, Matchers} import org.specs2.mock.Mockito import wdl4s.values.WdlFile -class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks { +class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with TableDrivenPropertyChecks with BackendSpec { behavior of "SharedFileSystem" val defaultLocalization = ConfigFactory.parseString(""" localization: [copy, hard-link, soft-link] """) val hardLinkLocalization = ConfigFactory.parseString(""" localization: [hard-link] """) val softLinkLocalization = ConfigFactory.parseString(""" localization: [soft-link] """) - val localFS = List(FileSystems.getDefault) + val localPathBuilder = List(DefaultPathBuilder) def localizationTest(config: Config, @@ -34,12 +36,16 @@ class SharedFileSystemSpec extends FlatSpec with Matchers with Mockito with Tabl dest.touch() } - val inputs = Map("input" -> WdlFile(orig.pathAsString)) - val sharedFS = new SharedFileSystem { override val sharedFileSystemConfig = config } - val result = sharedFS.localizeInputs(callDir.path, docker = docker, localFS, inputs) + val inputs = fqnMapToDeclarationMap(Map("input" -> WdlFile(orig.pathAsString))) + val sharedFS = new SharedFileSystem { + override val pathBuilders = localPathBuilder + override val sharedFileSystemConfig = config + } + val localizedinputs = Map(inputs.head._1 -> WdlFile(dest.pathAsString)) + val result = sharedFS.localizeInputs(callDir.path, docker = docker)(inputs) result.isSuccess shouldBe true - result.get should contain theSameElementsAs Map("input" -> WdlFile(dest.pathAsString)) + result.get should contain theSameElementsAs localizedinputs dest.exists shouldBe true countLinks(dest) should be(linkNb) diff --git a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala index 0ae091367..72d2c6e99 100644 --- a/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala +++ b/supportedBackends/sfs/src/test/scala/cromwell/backend/sfs/TestLocalAsyncJobExecutionActor.scala @@ -3,7 +3,7 @@ package cromwell.backend.sfs import akka.actor.{ActorSystem, Props} import akka.testkit.TestActorRef import cromwell.backend.BackendJobExecutionActor.BackendJobExecutionResponse -import cromwell.backend.io.WorkflowPaths +import cromwell.backend.io.WorkflowPathsWithDocker import cromwell.backend.validation.{DockerValidation, RuntimeAttributesValidation} import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor} @@ -34,7 +34,7 @@ object TestLocalAsyncJobExecutionActor { def createBackendRef(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor) (implicit system: ActorSystem): TestActorRef[SharedFileSystemJobExecutionActor] = { val emptyActor = system.actorOf(Props.empty) - val workflowPaths = new WorkflowPaths(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) + val workflowPaths = new WorkflowPathsWithDocker(jobDescriptor.workflowDescriptor, configurationDescriptor.backendConfig) val initializationData = new SharedFileSystemBackendInitializationData(workflowPaths, SharedFileSystemValidatedRuntimeAttributesBuilder.default.withValidation(DockerValidation.optional)) diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala index ef01c9985..6fd75323e 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkBackendFactory.scala @@ -2,14 +2,14 @@ package cromwell.backend.impl.spark import akka.actor.{ActorRef, ActorSystem, Props} import cromwell.backend._ +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.SharedFileSystemExpressionFunctions -import cromwell.backend.io.JobPaths import cromwell.core.CallContext -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.expression.WdlStandardLibraryFunctions case class SparkBackendFactory(name: String, configurationDescriptor: BackendConfigurationDescriptor, actorSystem: ActorSystem) extends BackendLifecycleActorFactory { - override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], serviceRegistryActor: ActorRef): Option[Props] = { + override def workflowInitializationActorProps(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], serviceRegistryActor: ActorRef): Option[Props] = { Option(SparkInitializationActor.props(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } @@ -22,13 +22,13 @@ case class SparkBackendFactory(name: String, configurationDescriptor: BackendCon override def expressionLanguageFunctions(workflowDescriptor: BackendWorkflowDescriptor, jobKey: BackendJobDescriptorKey, initializationData: Option[BackendInitializationData]): WdlStandardLibraryFunctions = { - val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobKey) + val jobPaths = new JobPathsWithDocker(jobKey, workflowDescriptor, configurationDescriptor.backendConfig) val callContext = new CallContext( jobPaths.callExecutionRoot, jobPaths.stdout.toAbsolutePath.toString, jobPaths.stderr.toAbsolutePath.toString ) - new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultFileSystems, callContext) + new SharedFileSystemExpressionFunctions(SparkJobExecutionActor.DefaultPathBuilders, callContext) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala index 2c4b5f94f..1ff66aa24 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkInitializationActor.scala @@ -6,7 +6,7 @@ import cromwell.backend.validation.RuntimeAttributesDefault import cromwell.backend.validation.RuntimeAttributesKeys._ import cromwell.backend.{BackendConfigurationDescriptor, BackendInitializationData, BackendWorkflowDescriptor, BackendWorkflowInitializationActor} import cromwell.core.WorkflowOptions -import wdl4s.Call +import wdl4s.TaskCall import wdl4s.types.{WdlBooleanType, WdlIntegerType, WdlStringType} import wdl4s.values.WdlValue @@ -18,14 +18,14 @@ object SparkInitializationActor { SparkRuntimeAttributes.NumberOfExecutorsKey, SparkRuntimeAttributes.AppMainClassKey) def props(workflowDescriptor: BackendWorkflowDescriptor, - calls: Seq[Call], + calls: Set[TaskCall], configurationDescriptor: BackendConfigurationDescriptor, serviceRegistryActor: ActorRef): Props = Props(new SparkInitializationActor(workflowDescriptor, calls, configurationDescriptor, serviceRegistryActor)) } class SparkInitializationActor(override val workflowDescriptor: BackendWorkflowDescriptor, - override val calls: Seq[Call], + override val calls: Set[TaskCall], override val configurationDescriptor: BackendConfigurationDescriptor, override val serviceRegistryActor: ActorRef) extends BackendWorkflowInitializationActor { diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala index c793338eb..927ac7d45 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkJobExecutionActor.scala @@ -1,15 +1,16 @@ package cromwell.backend.impl.spark -import java.nio.file.FileSystems import java.nio.file.attribute.PosixFilePermission import akka.actor.Props -import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{BackendJobExecutionResponse, JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ -import cromwell.backend.io.JobPaths +import cromwell.backend.io.JobPathsWithDocker import cromwell.backend.sfs.{SharedFileSystem, SharedFileSystemExpressionFunctions} +import cromwell.backend.wdl.Command import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendJobExecutionActor} -import cromwell.core.{TailedWriter, UntailedWriter} +import cromwell.core.path.JavaWriterImplicits._ +import cromwell.core.path.{DefaultPathBuilder, TailedWriter, UntailedWriter} import wdl4s.parser.MemoryUnit import wdl4s.util.TryUtil @@ -18,7 +19,7 @@ import scala.sys.process.ProcessLogger import scala.util.{Failure, Success, Try} object SparkJobExecutionActor { - val DefaultFileSystems = List(FileSystems.getDefault) + val DefaultPathBuilders = List(DefaultPathBuilder) def props(jobDescriptor: BackendJobDescriptor, configurationDescriptor: BackendConfigurationDescriptor): Props = Props(new SparkJobExecutionActor(jobDescriptor, configurationDescriptor)) @@ -29,8 +30,8 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, import SparkJobExecutionActor._ import better.files._ - import cromwell.core.PathFactory._ + override val pathBuilders = DefaultPathBuilders private val tag = s"SparkJobExecutionActor-${jobDescriptor.key.tag}:" lazy val cmds = new SparkCommands @@ -43,7 +44,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private val sparkDeployMode = configurationDescriptor.backendConfig.getString("deployMode").toLowerCase override val sharedFileSystemConfig = fileSystemsConfig.getConfig("local") private val workflowDescriptor = jobDescriptor.workflowDescriptor - private val jobPaths = new JobPaths(workflowDescriptor, configurationDescriptor.backendConfig, jobDescriptor.key) + private val jobPaths = new JobPathsWithDocker(jobDescriptor.key, workflowDescriptor, configurationDescriptor.backendConfig) // Files private val executionDir = jobPaths.callExecutionRoot @@ -58,9 +59,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private lazy val isClusterMode = isSparkClusterMode(sparkDeployMode, sparkMaster) private val call = jobDescriptor.key.call - private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultFileSystems) + private val callEngineFunction = SharedFileSystemExpressionFunctions(jobPaths, DefaultPathBuilders) - private val lookup = jobDescriptor.inputs.apply _ + private val lookup = jobDescriptor.fullyQualifiedInputs.apply _ private val executionResponse = Promise[BackendJobExecutionResponse]() @@ -107,12 +108,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def resolveExecutionResult(jobReturnCode: Try[Int], failedOnStderr: Boolean): Future[BackendJobExecutionResponse] = { (jobReturnCode, failedOnStderr) match { case (Success(0), true) if File(jobPaths.stderr).lines.toList.nonEmpty => - Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed although return code is zero but stderr is not empty"), Option(0))) case (Success(0), _) => resolveExecutionProcess - case (Success(rc), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, + case (Success(rc), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, new IllegalStateException(s"Execution process failed. Spark returned non zero status code: $rc"), Option(rc))) - case (Failure(error), _) => Future.successful(FailedNonRetryableResponse(jobDescriptor.key, error, None)) + case (Failure(error), _) => Future.successful(JobFailedNonRetryableResponse(jobDescriptor.key, error, None)) } } @@ -122,9 +123,9 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case true => clusterExtProcess.startMonitoringSparkClusterJob(jobPaths.callExecutionRoot, SubmitJobJson.format(sparkDeployMode)) collect { case Finished => processSuccess(0) - case Failed(error: Throwable) => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case Failed(error: Throwable) => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } recover { - case error: Throwable => FailedNonRetryableResponse(jobDescriptor.key, error, None) + case error: Throwable => JobFailedNonRetryableResponse(jobDescriptor.key, error, None) } case false => Future.successful(processSuccess(0)) } @@ -132,12 +133,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, private def processSuccess(rc: Int) = { evaluateOutputs(callEngineFunction, outputMapper(jobPaths)) match { - case Success(outputs) => SucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) + case Success(outputs) => JobSucceededResponse(jobDescriptor.key, Some(rc), outputs, None, Seq.empty) case Failure(e) => val message = Option(e.getMessage) map { ": " + _ } getOrElse "" - FailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) + JobFailedNonRetryableResponse(jobDescriptor.key, new Throwable("Failed post processing of outputs" + message, e), Option(rc)) } } @@ -155,9 +156,12 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, executionDir.toString.toFile.createIfNotExists(asDirectory = true, createParents = true) log.debug("{} Resolving job command", tag) - val command = localizeInputs(jobPaths.callInputsRoot, docker = false, DefaultFileSystems, jobDescriptor.inputs) flatMap { - localizedInputs => call.task.instantiateCommand(localizedInputs, callEngineFunction, identity) - } + + val command = Command.instantiate( + jobDescriptor, + callEngineFunction, + localizeInputs(jobPaths.callInputsRoot, docker = false) + ) log.debug("{} Creating bash script for executing command: {}", tag, command) // TODO: we should use shapeless Heterogeneous list here not good to have generic map @@ -202,7 +206,7 @@ class SparkJobExecutionActor(override val jobDescriptor: BackendJobDescriptor, case false => executionResponse completeWith executeTask(extProcess, stdoutWriter, stderrWriter) } } recover { - case exception => executionResponse success FailedNonRetryableResponse(jobDescriptor.key, exception, None) + case exception => executionResponse success JobFailedNonRetryableResponse(jobDescriptor.key, exception, None) } } diff --git a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala index 60f399218..01d221161 100644 --- a/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala +++ b/supportedBackends/spark/src/main/scala/cromwell/backend/impl/spark/SparkProcess.scala @@ -2,12 +2,12 @@ package cromwell.backend.impl.spark import java.nio.file.Path +import better.files._ import com.typesafe.scalalogging.StrictLogging -import cromwell.core.{TailedWriter, UntailedWriter} -import cromwell.core.PathFactory.EnhancedPath +import cromwell.core.path.PathImplicits._ +import cromwell.core.path.{TailedWriter, UntailedWriter} import scala.sys.process._ -import better.files._ import scala.util.{Failure, Success, Try} object SparkCommands { @@ -29,12 +29,17 @@ class SparkCommands extends StrictLogging { * as some extra shell code for monitoring jobs */ def writeScript(instantiatedCommand: String, filePath: Path, containerRoot: Path) = { - File(filePath).write( - s"""#!/bin/sh - |cd $containerRoot - |$instantiatedCommand - |echo $$? > rc - |""".stripMargin) + + val scriptBody = + s""" + +#!/bin/sh +cd $containerRoot +$instantiatedCommand +echo $$? > rc + + """.trim + "\n" + File(filePath).write(scriptBody) } def sparkSubmitCommand(attributes: Map[String, Any]): String = { diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala index f8aba1a4b..de58b7c06 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkInitializationActorSpec.scala @@ -7,6 +7,7 @@ import cromwell.backend.{BackendConfigurationDescriptor, BackendWorkflowDescript import cromwell.core.TestKitSuite import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} import wdl4s._ + import scala.concurrent.duration._ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationActorSpec") @@ -27,12 +28,12 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin - private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Seq[Call], conf: BackendConfigurationDescriptor) = { + private def getSparkBackend(workflowDescriptor: BackendWorkflowDescriptor, calls: Set[TaskCall], conf: BackendConfigurationDescriptor) = { system.actorOf(SparkInitializationActor.props(workflowDescriptor, calls, conf, emptyActor)) } @@ -41,7 +42,7 @@ class SparkInitializationActorSpec extends TestKitSuite("SparkInitializationAc within(Timeout) { EventFilter.warning(message = s"Key/s [memory] is/are not supported by SparkBackend. Unsupported attributes will not be part of jobs executions.", occurrences = 1) intercept { val workflowDescriptor = buildWorkflowDescriptor(HelloWorld, runtime = """runtime { memory: 1 %s: "%s"}""".format("appMainClass", "test")) - val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflowNamespace.workflow.calls, emptyBackendConfig) + val backend = getSparkBackend(workflowDescriptor, workflowDescriptor.workflow.taskCalls, emptyBackendConfig) backend ! Initialize } } diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala index aa4400ac8..3a7873fc5 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkJobExecutionActorSpec.scala @@ -6,12 +6,12 @@ import java.nio.file.Path import akka.testkit.{ImplicitSender, TestActorRef} import better.files._ import com.typesafe.config.ConfigFactory -import cromwell.backend.BackendJobExecutionActor.{FailedNonRetryableResponse, SucceededResponse} +import cromwell.backend.BackendJobExecutionActor.{JobFailedNonRetryableResponse, JobSucceededResponse} import cromwell.backend.impl.spark.SparkClusterProcess._ import cromwell.backend.io._ import cromwell.backend.{BackendConfigurationDescriptor, BackendJobDescriptor, BackendSpec} -import cromwell.core.{PathWriter, TailedWriter, TestKitSuite, UntailedWriter, _} -import org.mockito.Matchers._ +import cromwell.core.{TestKitSuite, WorkflowOptions} +import cromwell.core.path.{PathWriter, TailedWriter, UntailedWriter} import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.concurrent.PatienceConfiguration.Timeout @@ -50,7 +50,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -68,7 +68,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") | RUNTIME |} | - |workflow helloClusterMode { + |workflow wf_helloClusterMode { | call helloClusterMode |} """.stripMargin @@ -171,7 +171,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Finished)) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -201,8 +201,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.successful(Failed(new Throwable("failed to monitor")))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to monitor")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -232,8 +232,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.startMonitoringSparkClusterJob(any[Path], any[String])).thenReturn(Future.failed(new IllegalStateException("failed to start monitoring process"))) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains("failed to start monitoring process")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -263,8 +263,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(sampleSubmissionResponse) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) verify(sparkClusterProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkClusterProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkClusterProcess, times(1)).untailedWriter(any[Path]) @@ -292,8 +292,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) } @@ -318,8 +318,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkClusterProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"submit job process exitValue method failed")) } cleanUpJob(jobPaths) } @@ -347,7 +347,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") }).underlyingActor whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -376,8 +376,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.processStderr).thenReturn(stderrResult) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed. Spark returned non zero status code:")) } cleanUpJob(jobPaths) @@ -402,8 +402,8 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[FailedNonRetryableResponse] - assert(response.asInstanceOf[FailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) + response shouldBe a[JobFailedNonRetryableResponse] + assert(response.asInstanceOf[JobFailedNonRetryableResponse].throwable.getMessage.contains(s"Execution process failed although return code is zero but stderr is not empty")) } cleanUpJob(jobPaths) @@ -427,7 +427,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") when(sparkProcess.untailedWriter(any[Path])).thenReturn(stubUntailed) whenReady(backend.execute, timeout) { response => - response shouldBe a[SucceededResponse] + response shouldBe a[JobSucceededResponse] verify(sparkProcess, times(1)).externalProcess(any[Seq[String]], any[ProcessLogger]) verify(sparkProcess, times(1)).tailedWriter(any[Int], any[Path]) verify(sparkProcess, times(1)).untailedWriter(any[Path]) @@ -438,7 +438,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") } - private def cleanUpJob(jobPaths: JobPaths): Unit = { + private def cleanUpJob(jobPaths: JobPathsWithDocker): Unit = { File(jobPaths.workflowRoot).delete(true) () } @@ -447,7 +447,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") val backendWorkflowDescriptor = buildWorkflowDescriptor(wdl = wdlSource, inputs = inputFiles.getOrElse(Map.empty), runtime = runtimeString) val backendConfigurationDescriptor = if (isCluster) BackendConfigurationDescriptor(backendClusterConfig, ConfigFactory.load) else BackendConfigurationDescriptor(backendClientConfig, ConfigFactory.load) val jobDesc = jobDescriptorFromSingleCallWorkflow(backendWorkflowDescriptor, inputFiles.getOrElse(Map.empty), WorkflowOptions.empty, Set.empty) - val jobPaths = if (isCluster) new JobPaths(backendWorkflowDescriptor, backendClusterConfig, jobDesc.key) else new JobPaths(backendWorkflowDescriptor, backendClientConfig, jobDesc.key) + val jobPaths = if (isCluster) new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClusterConfig) else new JobPathsWithDocker(jobDesc.key, backendWorkflowDescriptor, backendClientConfig) val executionDir = jobPaths.callExecutionRoot val stdout = File(executionDir.toString, "stdout") stdout.createIfNotExists(asDirectory = false, createParents = true) @@ -456,7 +456,7 @@ class SparkJobExecutionActorSpec extends TestKitSuite("SparkJobExecutionActor") TestJobDescriptor(jobDesc, jobPaths, backendConfigurationDescriptor) } - private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPaths, backendConfigurationDescriptor: BackendConfigurationDescriptor) + private case class TestJobDescriptor(jobDescriptor: BackendJobDescriptor, jobPaths: JobPathsWithDocker, backendConfigurationDescriptor: BackendConfigurationDescriptor) trait MockWriter extends Writer { var closed = false diff --git a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala index 33724cb6d..6167f0fd7 100644 --- a/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala +++ b/supportedBackends/spark/src/test/scala/cromwell/backend/impl/spark/SparkRuntimeAttributesSpec.scala @@ -9,7 +9,7 @@ import wdl4s.WdlExpression._ import wdl4s.expression.NoFunctions import wdl4s.util.TryUtil import wdl4s.values.WdlValue -import wdl4s.{Call, WdlExpression, _} +import wdl4s.{Call, _} class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { @@ -26,7 +26,7 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { | RUNTIME |} | - |workflow hello { + |workflow wf_hello { | call hello |} """.stripMargin @@ -90,7 +90,7 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { runtime: String) = { BackendWorkflowDescriptor( WorkflowId.randomId(), - NamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark"))), + WdlNamespaceWithWorkflow.load(wdl.replaceAll("RUNTIME", runtime.format("appMainClass", "com.test.spark")), Seq.empty[ImportResolver]).workflow, inputs, options ) @@ -100,12 +100,11 @@ class SparkRuntimeAttributesSpec extends WordSpecLike with Matchers { val workflowDescriptor = buildWorkflowDescriptor(wdlSource, runtime = runtimeAttributes) def createLookup(call: Call): ScopedLookupFunction = { - val declarations = workflowDescriptor.workflowNamespace.workflow.declarations ++ call.task.declarations val knownInputs = workflowDescriptor.inputs - WdlExpression.standardLookupFunction(knownInputs, declarations, NoFunctions) + call.lookupFunction(knownInputs, NoFunctions) } - workflowDescriptor.workflowNamespace.workflow.calls map { + workflowDescriptor.workflow.taskCalls map { call => val ra = call.task.runtimeAttributes.attrs mapValues { _.evaluate(createLookup(call), NoFunctions) } TryUtil.sequenceMap(ra, "Runtime attributes evaluation").get