From a6c34ba2a79f26dbaffde6d1136e387c758fc53b Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sat, 30 May 2026 08:35:17 -0700 Subject: [PATCH 1/4] feat: route computing-unit metadata over HTTP, off Postgres MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The computing unit runs user-defined functions yet shipped with Postgres credentials (issue #5011). Add an opt-in path so the CU performs no direct JDBC access: - Execution metadata (create execution; runtime-stats / console / result URIs; latest-execution and result-URI lookup) routes to the Dashboard Service via a new /api/internal/execution-metadata/* API and an HTTP client; dataset-path resolution routes to file-service's new /api/dataset/resolve. Both forward the user's JWT. - Routing is active when SqlServer is uninitialized (the CU) and the user token is present; ComputingUnitMaster skips SqlServer.initConnection and DB cleanup when EXECUTION_METADATA_REMOTE=true. No behavior change with the flag off — the Dashboard Service keeps the direct-DB path. Unit-tested both HTTP clients (RemoteDatasetResolver, RemoteExecutionMetadata). --- .../texera/web/ComputingUnitMaster.scala | 67 +++++--- .../texera/web/TexeraWebApplication.scala | 1 + .../InternalExecutionMetadataResource.scala | 161 ++++++++++++++++++ .../workflow/WorkflowExecutionsResource.scala | 90 +++++++--- .../ExecutionsMetadataPersistService.scala | 18 ++ .../web/service/RemoteExecutionMetadata.scala | 155 +++++++++++++++++ .../service/RemoteExecutionMetadataSpec.scala | 105 ++++++++++++ .../amber/config/EnvironmentalVariable.scala | 10 ++ .../org/apache/texera/dao/SqlServer.scala | 3 + .../amber/core/storage/FileResolver.scala | 5 + .../core/storage/RemoteDatasetResolver.scala | 89 ++++++++++ .../storage/RemoteDatasetResolverSpec.scala | 86 ++++++++++ .../service/resource/DatasetResource.scala | 20 +++ 13 files changed, 764 insertions(+), 46 deletions(-) create mode 100644 amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala create mode 100644 amber/src/main/scala/org/apache/texera/web/service/RemoteExecutionMetadata.scala create mode 100644 amber/src/test/scala/org/apache/texera/web/service/RemoteExecutionMetadataSpec.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolver.scala create mode 100644 common/workflow-core/src/test/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolverSpec.scala diff --git a/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala b/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala index 41d8d3b5830..941585f720c 100644 --- a/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala +++ b/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala @@ -25,7 +25,7 @@ import io.dropwizard.Configuration import io.dropwizard.configuration.{EnvironmentVariableSubstitutor, SubstitutingSourceProvider} import io.dropwizard.setup.{Bootstrap, Environment} import io.dropwizard.websockets.WebsocketBundle -import org.apache.texera.amber.config.{ApplicationConfig, StorageConfig} +import org.apache.texera.amber.config.{ApplicationConfig, EnvironmentalVariable, StorageConfig} import org.apache.texera.amber.core.storage.DocumentFactory import org.apache.texera.amber.core.virtualidentity.ExecutionIdentity import org.apache.texera.amber.core.workflow.{PhysicalPlan, WorkflowContext} @@ -141,11 +141,20 @@ class ComputingUnitMaster extends io.dropwizard.Application[Configuration] with override def run(configuration: Configuration, environment: Environment): Unit = { ObjectMapperUtils.warmupObjectMapperForOperatorsSerde() - SqlServer.initConnection( - StorageConfig.jdbcUrl, - StorageConfig.jdbcUsername, - StorageConfig.jdbcPassword - ) + // In remote mode the CU routes execution-metadata operations over HTTP to the dashboard + // service and holds no Postgres credentials of its own (issue #5011). + val remote = + EnvironmentalVariable + .get(EnvironmentalVariable.ENV_EXECUTION_METADATA_REMOTE) + .contains("true") + + if (!remote) { + SqlServer.initConnection( + StorageConfig.jdbcUrl, + StorageConfig.jdbcUsername, + StorageConfig.jdbcPassword + ) + } environment.jersey.setUrlPattern("/api/*") @@ -177,28 +186,32 @@ class ComputingUnitMaster extends io.dropwizard.Application[Configuration] with new WebsocketPayloadSizeTuner(ApplicationConfig.maxWorkflowWebsocketRequestPayloadSizeKb) ) - val timeToLive: Int = ApplicationConfig.sinkStorageTTLInSecs - if (ApplicationConfig.cleanupAllExecutionResults) { - // do one time cleanup of collections that were not closed gracefully before restart/crash - // retrieve all executions that were executing before the reboot. - val allExecutionsBeforeRestart: List[WorkflowExecutions] = - WorkflowExecutionsResource.getExpiredExecutionsWithResultOrLog(-1) - cleanExecutions( - allExecutionsBeforeRestart, - statusByte => { - if (statusByte != maptoStatusCode(COMPLETED)) { - maptoStatusCode(FAILED) // for incomplete executions, mark them as failed. - } else { - statusByte + // Result/log cleanup needs a database connection and is owned by the dashboard service in + // remote mode, so skip it on the computing unit when running remotely. + if (!remote) { + val timeToLive: Int = ApplicationConfig.sinkStorageTTLInSecs + if (ApplicationConfig.cleanupAllExecutionResults) { + // do one time cleanup of collections that were not closed gracefully before restart/crash + // retrieve all executions that were executing before the reboot. + val allExecutionsBeforeRestart: List[WorkflowExecutions] = + WorkflowExecutionsResource.getExpiredExecutionsWithResultOrLog(-1) + cleanExecutions( + allExecutionsBeforeRestart, + statusByte => { + if (statusByte != maptoStatusCode(COMPLETED)) { + maptoStatusCode(FAILED) // for incomplete executions, mark them as failed. + } else { + statusByte + } } - } - ) - } - scheduleRecurringCallThroughActorSystem( - 2.seconds, - ApplicationConfig.sinkStorageCleanUpCheckIntervalInSecs.seconds - ) { - recurringCheckExpiredResults(timeToLive) + ) + } + scheduleRecurringCallThroughActorSystem( + 2.seconds, + ApplicationConfig.sinkStorageCleanUpCheckIntervalInSecs.seconds + ) { + recurringCheckExpiredResults(timeToLive) + } } environment.jersey.register(classOf[WorkflowExecutionsResource]) diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala index 98b7c68c974..4854ae6cd9f 100644 --- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala +++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala @@ -154,6 +154,7 @@ class TexeraWebApplication environment.jersey.register(classOf[ProjectResource]) environment.jersey.register(classOf[ProjectAccessResource]) environment.jersey.register(classOf[WorkflowExecutionsResource]) + environment.jersey.register(classOf[InternalExecutionMetadataResource]) environment.jersey.register(classOf[DashboardResource]) environment.jersey.register(classOf[GmailResource]) environment.jersey.register(classOf[AdminExecutionResource]) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala new file mode 100644 index 00000000000..06f45a44ec2 --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web.resource + +import io.dropwizard.auth.Auth +import org.apache.texera.amber.core.virtualidentity.{ + ExecutionIdentity, + OperatorIdentity, + WorkflowIdentity +} +import org.apache.texera.amber.core.workflow.PortIdentity +import org.apache.texera.auth.SessionUser +import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource +import org.apache.texera.web.service.ExecutionsMetadataPersistService + +import java.net.URI +import javax.annotation.security.RolesAllowed +import javax.ws.rs._ +import javax.ws.rs.core.MediaType + +case class CreateExecutionRequest( + workflowId: Long, + uid: Option[Integer], + executionName: String, + environmentVersion: String, + computingUnitId: Integer +) + +case class CreateExecutionResponse(eid: Long) + +case class RuntimeStatsUriRequest(workflowId: Long, uri: String) + +case class OperatorConsoleUriRequest(operatorId: String, uri: String) + +case class PortResultUriRequest(globalPortId: String, uri: String) + +case class ResultUriResponse(uri: String) + +case class LatestExecutionResponse(eid: Int) + +/** + * Internal HTTP endpoints that the dashboard service exposes so a computing unit can perform + * execution-metadata operations without holding Postgres credentials (issue #5011). + * + * These endpoints run on the dashboard service, where `SqlServer` is initialized, so the companion + * methods they delegate to take their direct-DB branch and never recurse back to the remote client. + */ +@Path("/internal/execution-metadata") +@Consumes(Array(MediaType.APPLICATION_JSON)) +@Produces(Array(MediaType.APPLICATION_JSON)) +class InternalExecutionMetadataResource { + + @POST + @Path("/create") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def createExecution( + request: CreateExecutionRequest, + @Auth user: SessionUser + ): CreateExecutionResponse = { + val eid = ExecutionsMetadataPersistService.insertNewExecution( + WorkflowIdentity(request.workflowId), + request.uid, + request.executionName, + request.environmentVersion, + request.computingUnitId + ) + CreateExecutionResponse(eid.id.toLong) + } + + @PUT + @Path("/{eid}/runtime-stats-uri") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def updateRuntimeStatsUri( + @PathParam("eid") eid: Long, + request: RuntimeStatsUriRequest, + @Auth user: SessionUser + ): Unit = { + WorkflowExecutionsResource.updateRuntimeStatsUri(request.workflowId, eid, new URI(request.uri)) + } + + @POST + @Path("/{eid}/operator-console") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def insertOperatorConsoleUri( + @PathParam("eid") eid: Long, + request: OperatorConsoleUriRequest, + @Auth user: SessionUser + ): Unit = { + WorkflowExecutionsResource.insertOperatorExecutions( + eid, + request.operatorId, + new URI(request.uri) + ) + } + + @POST + @Path("/{eid}/port-result") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def insertPortResultUri( + @PathParam("eid") eid: Long, + request: PortResultUriRequest, + @Auth user: SessionUser + ): Unit = { + WorkflowExecutionsResource.insertOperatorPortResultUriSerialized( + ExecutionIdentity(eid), + request.globalPortId, + new URI(request.uri) + ) + } + + @GET + @Path("/{eid}/port-result") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def getResultUri( + @PathParam("eid") eid: Long, + @QueryParam("opId") opId: String, + @QueryParam("portId") portId: Int, + @QueryParam("internal") internal: Boolean, + @Auth user: SessionUser + ): ResultUriResponse = { + WorkflowExecutionsResource + .getResultUriByLogicalPortId( + ExecutionIdentity(eid), + OperatorIdentity(opId), + PortIdentity(portId, internal) + ) + .map(uri => ResultUriResponse(uri.toString)) + .getOrElse(throw new NotFoundException(s"No result URI found for execution $eid")) + } + + @GET + @Path("/latest") + @RolesAllowed(Array("REGULAR", "ADMIN")) + def getLatestExecutionId( + @QueryParam("wid") wid: Integer, + @QueryParam("cuid") cuid: Integer, + @Auth user: SessionUser + ): LatestExecutionResponse = { + WorkflowExecutionsResource + .getLatestExecutionID(wid, cuid) + .map(eid => LatestExecutionResponse(eid.intValue())) + .getOrElse(throw new NotFoundException(s"No execution found for workflow $wid")) + } +} diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala index 72fb1c364e5..086825d7742 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/workflow/WorkflowExecutionsResource.scala @@ -43,7 +43,11 @@ import org.apache.texera.dao.jooq.generated.tables.daos.WorkflowExecutionsDao import org.apache.texera.dao.jooq.generated.tables.pojos.{WorkflowExecutions, User => UserPojo} import org.apache.texera.web.model.http.request.result.ResultExportRequest import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource._ -import org.apache.texera.web.service.{ExecutionsMetadataPersistService, ResultExportService} +import org.apache.texera.web.service.{ + ExecutionsMetadataPersistService, + RemoteExecutionMetadata, + ResultExportService +} import org.jooq.DSLContext import play.api.libs.json.Json @@ -68,6 +72,10 @@ object WorkflowExecutionsResource { } def getExpiredExecutionsWithResultOrLog(timeToLive: Int): List[WorkflowExecutions] = { + if (RemoteExecutionMetadata.enabled) { + // result/log cleanup is owned by the dashboard service in remote mode. + return List.empty + } val deadline = new Timestamp( System.currentTimeMillis() - TimeUnit.SECONDS.toMillis(timeToLive) ) @@ -93,6 +101,9 @@ object WorkflowExecutionsResource { * @return Integer */ def getLatestExecutionID(wid: Integer, cuid: Integer): Option[Integer] = { + if (RemoteExecutionMetadata.enabled) { + return RemoteExecutionMetadata.getLatestExecutionId(wid, cuid) + } val executions = context .select(WORKFLOW_EXECUTIONS.EID) .from(WORKFLOW_EXECUTIONS) @@ -238,6 +249,23 @@ object WorkflowExecutionsResource { eid: ExecutionIdentity, globalPortId: GlobalPortIdentity, uri: URI + ): Unit = { + if (RemoteExecutionMetadata.enabled) { + RemoteExecutionMetadata.insertPortResultUri( + eid.id.toLong, + globalPortId.serializeAsString, + uri + ) + } else { + insertOperatorPortResultUriSerialized(eid, globalPortId.serializeAsString, uri) + } + } + + /** Inserts a port-result row using an already-serialized globalPortId, bypassing re-serialization. */ + def insertOperatorPortResultUriSerialized( + eid: ExecutionIdentity, + globalPortIdSerialized: String, + uri: URI ): Unit = { context .insertInto(OPERATOR_PORT_EXECUTIONS) @@ -246,7 +274,7 @@ object WorkflowExecutionsResource { OPERATOR_PORT_EXECUTIONS.GLOBAL_PORT_ID, OPERATOR_PORT_EXECUTIONS.RESULT_URI ) - .values(eid.id.toInt, globalPortId.serializeAsString, uri.toString) + .values(eid.id.toInt, globalPortIdSerialized, uri.toString) .execute() } @@ -255,6 +283,9 @@ object WorkflowExecutionsResource { opId: String, uri: URI ): Unit = { + if (RemoteExecutionMetadata.enabled) { + return RemoteExecutionMetadata.insertOperatorConsoleUri(eid, opId, uri) + } context .insertInto(OPERATOR_EXECUTIONS) .columns( @@ -267,6 +298,9 @@ object WorkflowExecutionsResource { } def updateRuntimeStatsUri(wid: Long, eid: Long, uri: URI): Unit = { + if (RemoteExecutionMetadata.enabled) { + return RemoteExecutionMetadata.updateRuntimeStatsUri(wid, eid, uri) + } context .update(WORKFLOW_EXECUTIONS) .set(WORKFLOW_EXECUTIONS.RUNTIME_STATS_URI, uri.toString) @@ -286,6 +320,9 @@ object WorkflowExecutionsResource { } def getResultUrisByExecutionId(eid: ExecutionIdentity): List[URI] = { + if (RemoteExecutionMetadata.enabled) { + return List.empty + } context .select(OPERATOR_PORT_EXECUTIONS.RESULT_URI) .from(OPERATOR_PORT_EXECUTIONS) @@ -298,25 +335,29 @@ object WorkflowExecutionsResource { } def getConsoleMessagesUriByExecutionId(eid: ExecutionIdentity): List[URI] = - context - .select(OPERATOR_EXECUTIONS.CONSOLE_MESSAGES_URI) - .from(OPERATOR_EXECUTIONS) - .where(OPERATOR_EXECUTIONS.WORKFLOW_EXECUTION_ID.eq(eid.id.toInt)) - .fetchInto(classOf[String]) - .asScala - .toList - .filter(uri => uri != null && uri.nonEmpty) - .map(URI.create) + if (RemoteExecutionMetadata.enabled) List.empty + else + context + .select(OPERATOR_EXECUTIONS.CONSOLE_MESSAGES_URI) + .from(OPERATOR_EXECUTIONS) + .where(OPERATOR_EXECUTIONS.WORKFLOW_EXECUTION_ID.eq(eid.id.toInt)) + .fetchInto(classOf[String]) + .asScala + .toList + .filter(uri => uri != null && uri.nonEmpty) + .map(URI.create) def getRuntimeStatsUriByExecutionId(eid: ExecutionIdentity): Option[URI] = - Option( - context - .select(WORKFLOW_EXECUTIONS.RUNTIME_STATS_URI) - .from(WORKFLOW_EXECUTIONS) - .where(WORKFLOW_EXECUTIONS.EID.eq(eid.id.toInt)) - .fetchOneInto(classOf[String]) - ).filter(_.nonEmpty) - .map(URI.create) + if (RemoteExecutionMetadata.enabled) None + else + Option( + context + .select(WORKFLOW_EXECUTIONS.RUNTIME_STATS_URI) + .from(WORKFLOW_EXECUTIONS) + .where(WORKFLOW_EXECUTIONS.EID.eq(eid.id.toInt)) + .fetchOneInto(classOf[String]) + ).filter(_.nonEmpty) + .map(URI.create) def getWorkflowExecutions( wid: Integer, @@ -359,6 +400,9 @@ object WorkflowExecutionsResource { } def deleteConsoleMessageAndExecutionResultUris(eid: ExecutionIdentity): Unit = { + if (RemoteExecutionMetadata.enabled) { + return + } context .delete(OPERATOR_PORT_EXECUTIONS) .where(OPERATOR_PORT_EXECUTIONS.WORKFLOW_EXECUTION_ID.eq(eid.id.toInt)) @@ -486,6 +530,14 @@ object WorkflowExecutionsResource { opId: OperatorIdentity, portId: PortIdentity ): Option[URI] = { + if (RemoteExecutionMetadata.enabled) { + return RemoteExecutionMetadata.getResultUri( + eid.id.toLong, + opId.id, + portId.id, + portId.internal + ) + } def isMatchingExternalPortURI(uri: URI): Boolean = { val (_, _, globalPortIdOption, resourceType) = VFSURIFactory.decodeURI(uri) globalPortIdOption.exists { globalPortId => diff --git a/amber/src/main/scala/org/apache/texera/web/service/ExecutionsMetadataPersistService.scala b/amber/src/main/scala/org/apache/texera/web/service/ExecutionsMetadataPersistService.scala index b9b29dff72f..5423cc291e5 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/ExecutionsMetadataPersistService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/ExecutionsMetadataPersistService.scala @@ -57,6 +57,15 @@ object ExecutionsMetadataPersistService extends LazyLogging { environmentVersion: String, computingUnitId: Integer ): ExecutionIdentity = { + if (RemoteExecutionMetadata.enabled) { + return RemoteExecutionMetadata.createExecution( + workflowId.id, + uid, + executionName, + environmentVersion, + computingUnitId + ) + } // first retrieve the latest version of this workflow val vid = getLatestVersion(workflowId.id.toInt) val newExecution = new WorkflowExecutions() @@ -76,6 +85,10 @@ object ExecutionsMetadataPersistService extends LazyLogging { } def tryGetExistingExecution(executionId: ExecutionIdentity): Option[WorkflowExecutions] = { + if (RemoteExecutionMetadata.enabled) { + // degraded: previous-execution lookup is not needed on the computing unit in remote mode. + return None + } try { Option(workflowExecutionsDao.fetchOneByEid(executionId.id.toInt)) } catch { @@ -88,6 +101,11 @@ object ExecutionsMetadataPersistService extends LazyLogging { def tryUpdateExistingExecution( executionId: ExecutionIdentity )(updateFunc: WorkflowExecutions => Unit): Unit = { + if (RemoteExecutionMetadata.enabled) { + // no-op: execution-status updates are owned by the dashboard service in remote mode. + logger.debug("Skipping execution update in remote mode.") + return + } try { val execution = workflowExecutionsDao.fetchOneByEid(executionId.id.toInt) updateFunc(execution) diff --git a/amber/src/main/scala/org/apache/texera/web/service/RemoteExecutionMetadata.scala b/amber/src/main/scala/org/apache/texera/web/service/RemoteExecutionMetadata.scala new file mode 100644 index 00000000000..233aca20027 --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/service/RemoteExecutionMetadata.scala @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web.service + +import org.apache.texera.amber.config.EnvironmentalVariable +import org.apache.texera.amber.core.virtualidentity.ExecutionIdentity +import org.apache.texera.amber.util.JSONUtils.objectMapper + +import java.net.{HttpURLConnection, URI, URL, URLEncoder} +import java.nio.charset.StandardCharsets + +/** + * Routes execution-metadata operations to the dashboard service over HTTP, instead of querying + * Postgres directly. + * + * This lets a computing-unit pod — which runs user-defined functions and therefore must not hold + * database credentials (issue #5011) — persist and read execution metadata without a JDBC + * connection. + * + * Remote mode is active exactly when `SqlServer` is NOT initialized in this process: the CU skips + * initializing it, while the dashboard service keeps initializing it, so the dashboard service + * always takes the direct-DB path and the metadata endpoints never recurse back onto themselves. + */ +object RemoteExecutionMetadata { + + private lazy val userJwtToken: String = + sys.env.getOrElse(EnvironmentalVariable.ENV_USER_JWT_TOKEN, "").trim + + private lazy val baseEndpoint: String = + sys.env + .get(EnvironmentalVariable.ENV_DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT) + .map(_.trim) + .filter(_.nonEmpty) + .getOrElse("http://localhost:8080/api/internal/execution-metadata") + + /** Remote routing is active only when this process has no database connection of its own. */ + def enabled: Boolean = !org.apache.texera.dao.SqlServer.isInitialized + + def createExecution( + workflowId: Long, + uid: Option[Integer], + executionName: String, + environmentVersion: String, + computingUnitId: Integer + ): ExecutionIdentity = { + val body = objectMapper.createObjectNode() + body.put("workflowId", workflowId) + uid match { + case Some(value) => body.put("uid", value.intValue()) + case None => body.putNull("uid") + } + body.put("executionName", executionName) + body.put("environmentVersion", environmentVersion) + body.put("computingUnitId", computingUnitId.intValue()) + val response = request("POST", "/create", Some(body.toString)) + .getOrElse( + throw new RuntimeException("dashboard service returned no body for execution create") + ) + ExecutionIdentity(objectMapper.readTree(response).get("eid").asLong()) + } + + def updateRuntimeStatsUri(wid: Long, eid: Long, uri: URI): Unit = { + val body = objectMapper.createObjectNode() + body.put("workflowId", wid) + body.put("uri", uri.toString) + request("PUT", s"/$eid/runtime-stats-uri", Some(body.toString)) + } + + def insertOperatorConsoleUri(eid: Long, operatorId: String, uri: URI): Unit = { + val body = objectMapper.createObjectNode() + body.put("operatorId", operatorId) + body.put("uri", uri.toString) + request("POST", s"/$eid/operator-console", Some(body.toString)) + } + + def insertPortResultUri(eid: Long, globalPortIdSerialized: String, uri: URI): Unit = { + val body = objectMapper.createObjectNode() + body.put("globalPortId", globalPortIdSerialized) + body.put("uri", uri.toString) + request("POST", s"/$eid/port-result", Some(body.toString)) + } + + def getResultUri( + eid: Long, + opId: String, + portIdId: Int, + portIdInternal: Boolean + ): Option[URI] = { + val path = + s"/$eid/port-result?opId=${enc(opId)}&portId=$portIdId&internal=$portIdInternal" + request("GET", path, None) + .map(response => new URI(objectMapper.readTree(response).get("uri").asText())) + } + + def getLatestExecutionId(wid: Integer, cuid: Integer): Option[Integer] = { + request("GET", s"/latest?wid=$wid&cuid=$cuid", None) + .map(response => Integer.valueOf(objectMapper.readTree(response).get("eid").asInt())) + } + + private def enc(value: String): String = + URLEncoder.encode(value, StandardCharsets.UTF_8.name()) + + private def request(method: String, path: String, body: Option[String]): Option[String] = + request(baseEndpoint, userJwtToken, method, path, body) + + /** Endpoint/token are injectable so the HTTP round-trip can be unit-tested in isolation. */ + private[service] def request( + baseEndpoint: String, + token: String, + method: String, + path: String, + body: Option[String] + ): Option[String] = { + val connection = new URL(baseEndpoint + path).openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod(method) + connection.setRequestProperty("Authorization", s"Bearer $token") + connection.setRequestProperty("Content-Type", "application/json") + try { + body.foreach { payload => + connection.setDoOutput(true) + connection.getOutputStream.write(payload.getBytes(StandardCharsets.UTF_8)) + } + val code = connection.getResponseCode + if (code >= 200 && code < 300) { + val stream = connection.getInputStream + Some(new String(stream.readAllBytes(), StandardCharsets.UTF_8)) + } else if (code == HttpURLConnection.HTTP_NOT_FOUND) { + None + } else { + throw new RuntimeException( + s"dashboard service execution-metadata request failed: $method $path (HTTP $code)" + ) + } + } finally { + connection.disconnect() + } + } +} diff --git a/amber/src/test/scala/org/apache/texera/web/service/RemoteExecutionMetadataSpec.scala b/amber/src/test/scala/org/apache/texera/web/service/RemoteExecutionMetadataSpec.scala new file mode 100644 index 00000000000..39e0ecf0659 --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/web/service/RemoteExecutionMetadataSpec.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web.service + +import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.net.InetSocketAddress +import java.nio.charset.StandardCharsets + +/** + * Unit tests for the HTTP core of [[RemoteExecutionMetadata]] — the layer the computing unit uses + * to read/write execution metadata over HTTP instead of JDBC (issue #5011). Every public method + * routes through this `request` overload, so its method/auth/body forwarding and 2xx/404/error + * handling are what matters. + */ +class RemoteExecutionMetadataSpec extends AnyFlatSpec with Matchers { + + private def withServer(handler: HttpExchange => Unit)(test: String => Unit): Unit = { + val server = HttpServer.create(new InetSocketAddress("127.0.0.1", 0), 0) + server.createContext( + "/", + new HttpHandler { override def handle(exchange: HttpExchange): Unit = handler(exchange) } + ) + server.start() + val base = s"http://127.0.0.1:${server.getAddress.getPort}" + try test(base) + finally server.stop(0) + } + + private def respond(exchange: HttpExchange, status: Int, body: String): Unit = { + val bytes = body.getBytes(StandardCharsets.UTF_8) + if (bytes.isEmpty) exchange.sendResponseHeaders(status, -1) + else { + exchange.sendResponseHeaders(status, bytes.length.toLong) + exchange.getResponseBody.write(bytes) + } + exchange.close() + } + + "RemoteExecutionMetadata.request" should "forward the method, bearer token and body, and return a 2xx body" in { + var seenMethod: String = null + var seenAuth: String = null + var seenBody: String = null + var seenPath: String = null + withServer { exchange => + seenMethod = exchange.getRequestMethod + seenAuth = exchange.getRequestHeaders.getFirst("Authorization") + seenPath = exchange.getRequestURI.toString + seenBody = new String(exchange.getRequestBody.readAllBytes(), StandardCharsets.UTF_8) + respond(exchange, 200, """{"eid":7}""") + } { base => + val result = RemoteExecutionMetadata.request( + base, + "tok-1", + "POST", + "/create", + Some("""{"workflowId":3}""") + ) + result shouldBe Some("""{"eid":7}""") + seenMethod shouldBe "POST" + seenAuth shouldBe "Bearer tok-1" + seenPath shouldBe "/create" + seenBody should include("workflowId") + } + } + + it should "return None for a 404 (e.g. no result URI yet)" in { + withServer { exchange => respond(exchange, 404, "") } { base => + RemoteExecutionMetadata.request( + base, + "tok", + "GET", + "/1/port-result?opId=a&portId=0&internal=false", + None + ) shouldBe None + } + } + + it should "throw for a non-2xx, non-404 status" in { + withServer { exchange => respond(exchange, 500, "boom") } { base => + val ex = the[RuntimeException] thrownBy + RemoteExecutionMetadata.request(base, "tok", "PUT", "/1/runtime-stats-uri", Some("{}")) + ex.getMessage should include("500") + } + } +} diff --git a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala index 123c56505ee..b3a57515919 100644 --- a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala +++ b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala @@ -38,6 +38,16 @@ object EnvironmentalVariable { val ENV_FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT = "FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT" val ENV_FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT = "FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT" + // Endpoint that resolves a dataset path to its dataset:/// URI, so a computing unit can + // resolve datasets without holding Postgres credentials (issue #5011). + val ENV_FILE_SERVICE_RESOLVE_PATH_ENDPOINT = "FILE_SERVICE_RESOLVE_PATH_ENDPOINT" + + // Endpoint of the dashboard service that handles execution-metadata operations over HTTP, so a + // computing unit can persist/read execution metadata without holding Postgres credentials (#5011). + val ENV_DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT = + "DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT" + // When "true", the computing unit routes execution-metadata operations over HTTP instead of JDBC. + val ENV_EXECUTION_METADATA_REMOTE = "EXECUTION_METADATA_REMOTE" /** * Auth related vars diff --git a/common/dao/src/main/scala/org/apache/texera/dao/SqlServer.scala b/common/dao/src/main/scala/org/apache/texera/dao/SqlServer.scala index 6348ae41fa0..75b6fda7207 100644 --- a/common/dao/src/main/scala/org/apache/texera/dao/SqlServer.scala +++ b/common/dao/src/main/scala/org/apache/texera/dao/SqlServer.scala @@ -86,6 +86,9 @@ object SqlServer { instance.get } + /** Whether a database connection has been initialized in this process. */ + def isInitialized: Boolean = instance.isDefined + /** * A utility function for create a transaction block using given sql context * @param dsl the sql context diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala index c8a407df993..075b9889be2 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala @@ -113,6 +113,11 @@ object FileResolver { * @throws java.io.FileNotFoundException if the dataset file does not exist or cannot be created */ private def datasetResolveFunc(fileName: String): URI = { + // Inside a computing-unit pod (which runs user code and must not hold DB credentials), + // resolve through the file-service over HTTP instead of querying Postgres directly. + if (RemoteDatasetResolver.enabled) { + return RemoteDatasetResolver.resolve(fileName) + } val (ownerEmail, datasetName, versionName, fileRelativePathSegments) = parseDatasetFilePath(fileName).getOrElse( throw new FileNotFoundException(s"Dataset file $fileName not found.") diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolver.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolver.scala new file mode 100644 index 00000000000..f9304f4fb7f --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolver.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.storage + +import org.apache.texera.amber.config.EnvironmentalVariable + +import java.net.{HttpURLConnection, URI, URL, URLEncoder} +import java.nio.charset.StandardCharsets + +/** + * Resolves a dataset file path (`/ownerEmail/datasetName/versionName/relativePath`) to its + * `dataset:///` URI by calling the file-service over HTTP, instead of querying Postgres directly. + * + * This lets a computing-unit pod — which runs user-defined functions and therefore must not hold + * database credentials (issue #5011) — resolve datasets without a JDBC connection. + * + * Following the same convention as [[org.apache.texera.amber.core.storage.model.DatasetFileDocument]], + * the remote path is taken only when a forwarded `USER_JWT_TOKEN` is present (i.e. inside a CU pod). + * Backend services (which own the database) have no such token, so resolution stays local there and + * the file-service endpoint never recurses back onto itself. + */ +object RemoteDatasetResolver { + + private lazy val userJwtToken: String = + sys.env.getOrElse(EnvironmentalVariable.ENV_USER_JWT_TOKEN, "").trim + + /** + * The file-service endpoint that resolves a dataset path. If not set explicitly, it is derived + * from the presigned-URL endpoint the CU pod already receives, so no extra configuration is + * required for the resolution to work wherever file reads already work. + */ + private lazy val resolveEndpoint: String = + sys.env + .get(EnvironmentalVariable.ENV_FILE_SERVICE_RESOLVE_PATH_ENDPOINT) + .map(_.trim) + .filter(_.nonEmpty) + .orElse( + sys.env + .get(EnvironmentalVariable.ENV_FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT) + .map(_.trim.replace("/presign-download", "/resolve")) + ) + .getOrElse("http://localhost:9092/api/dataset/resolve") + + /** Remote resolution is active only when this process carries a forwarded user token. */ + def enabled: Boolean = userJwtToken.nonEmpty + + /** + * @throws java.io.IOException / RuntimeException if the file-service cannot resolve the path. + */ + def resolve(datasetPath: String): URI = resolve(resolveEndpoint, userJwtToken, datasetPath) + + /** Endpoint/token are injectable so the HTTP round-trip can be unit-tested in isolation. */ + private[storage] def resolve(endpoint: String, token: String, datasetPath: String): URI = { + val requestUrl = + s"$endpoint?path=${URLEncoder.encode(datasetPath, StandardCharsets.UTF_8.name())}" + val connection = new URL(requestUrl).openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("GET") + connection.setRequestProperty("Authorization", s"Bearer $token") + try { + val code = connection.getResponseCode + if (code != HttpURLConnection.HTTP_OK) { + throw new RuntimeException( + s"file-service failed to resolve dataset path '$datasetPath' (HTTP $code)" + ) + } + val body = new String(connection.getInputStream.readAllBytes(), StandardCharsets.UTF_8).trim + new URI(body) + } finally { + connection.disconnect() + } + } +} diff --git a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolverSpec.scala b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolverSpec.scala new file mode 100644 index 00000000000..3c9835a9b9c --- /dev/null +++ b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/storage/RemoteDatasetResolverSpec.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.storage + +import com.sun.net.httpserver.{HttpExchange, HttpHandler, HttpServer} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +import java.net.InetSocketAddress +import java.nio.charset.StandardCharsets + +class RemoteDatasetResolverSpec extends AnyFlatSpec with Matchers { + + /** Starts a throwaway in-process HTTP server at /api/dataset/resolve and runs `test` with its URL. */ + private def withResolveServer( + handler: HttpExchange => Unit + )(test: String => Unit): Unit = { + val server = HttpServer.create(new InetSocketAddress("127.0.0.1", 0), 0) + server.createContext( + "/api/dataset/resolve", + new HttpHandler { override def handle(exchange: HttpExchange): Unit = handler(exchange) } + ) + server.start() + val endpoint = s"http://127.0.0.1:${server.getAddress.getPort}/api/dataset/resolve" + try test(endpoint) + finally server.stop(0) + } + + private def respond(exchange: HttpExchange, status: Int, body: String): Unit = { + val bytes = body.getBytes(StandardCharsets.UTF_8) + exchange.sendResponseHeaders(status, bytes.length.toLong) + exchange.getResponseBody.write(bytes) + exchange.close() + } + + "RemoteDatasetResolver" should "GET the file-service with the bearer token and parse the returned URI" in { + var seenAuth: String = null + var seenQuery: String = null + withResolveServer { exchange => + seenAuth = exchange.getRequestHeaders.getFirst("Authorization") + seenQuery = exchange.getRequestURI.getQuery + respond(exchange, 200, "dataset:///dataset-15/abc123/california/tw1.csv") + } { endpoint => + val uri = + RemoteDatasetResolver.resolve( + endpoint, + "tok-123", + "/bob@texera.com/twitter/v1/california/tw1.csv" + ) + + uri.toString shouldBe "dataset:///dataset-15/abc123/california/tw1.csv" + uri.getScheme shouldBe "dataset" + seenAuth shouldBe "Bearer tok-123" + // the dataset path is URL-encoded into the `path` query parameter + seenQuery should include("path=") + seenQuery should include("twitter") + } + } + + it should "fail when the file-service returns a non-200 status" in { + withResolveServer { exchange => + respond(exchange, 404, "not found") + } { endpoint => + val ex = the[RuntimeException] thrownBy + RemoteDatasetResolver.resolve(endpoint, "tok", "/owner/ds/v1/missing.csv") + ex.getMessage should include("404") + } + } +} diff --git a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala index 987c2e59d65..20058e9a04e 100644 --- a/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala +++ b/file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala @@ -640,6 +640,26 @@ class DatasetResource extends LazyLogging { generatePresignedResponse(encodedUrl, repositoryName, commitHash, uid) } + /** + * Resolves a dataset path (`/ownerEmail/datasetName/versionName/relativePath`) to its + * `dataset:///` URI and returns it as plain text. This is the database-backed counterpart of + * [[org.apache.texera.amber.core.storage.RemoteDatasetResolver]], letting a computing unit + * resolve datasets over HTTP instead of querying Postgres directly (issue #5011). + */ + @GET + @RolesAllowed(Array("REGULAR", "ADMIN")) + @Path("/resolve") + @Produces(Array(MediaType.TEXT_PLAIN)) + def resolveDatasetPath( + @QueryParam("path") path: String, + @Auth user: SessionUser + ): String = { + if (path == null || path.trim.isEmpty) { + throw new BadRequestException("query parameter 'path' is required") + } + FileResolver.resolve(path).toString + } + @GET @RolesAllowed(Array("REGULAR", "ADMIN")) @Path("/presign-download-s3") From 69f5941902d8e76a2f5b7d4887a603670041ad06 Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sat, 30 May 2026 14:40:08 -0700 Subject: [PATCH 2/4] feat: offload workflow compilation from ComputingUnitMaster to compiling-service The computing unit re-compiled logical plans in-process (issue #5011), keeping amber's WorkflowCompiler on the execution path. Route compilation to the workflow-compiling-service over HTTP so the CU just runs a ready-made plan: - WorkflowExecutionService.executeWorkflow POSTs the logical plan to /api/compile (CompilingServiceClient) and runs the returned PhysicalPlan; the runtime no longer needs the logical plan (Workflow holds None). A failed compile now reports the error and returns instead of dereferencing a null workflow. The dead in-process validateWorkflow is removed. - PhysicalPlan is made JSON-serializable end to end: operator partition logic moves from closures to a DerivePartitionSpec ADT, with custom serdes for OpExecInitInfo, LocationPreference, OutputMode, and per-port PhysicalOp views, registered in a shared PhysicalPlanSerdeModule. WorkflowCompilationResource accepts workflow/execution ids and returns a typed success/failure response. - Fix a latent LogicalLink round-trip (issue #5042): fromOpId / toOpId now serialize as bare strings (the shape the @JsonCreator constructor reads), so a re-serialized plan deserializes again. Adds round-trip serde tests (hand-built, compiler-produced, and a thorough multi-operator suite) and the LogicalLink round-trip regression. --- .../architecture/controller/Workflow.scala | 5 +- .../web/resource/SyncExecutionResource.scala | 26 +- .../web/service/CompilingServiceClient.scala | 154 ++++++++ .../ExecutionReconfigurationService.scala | 42 ++- .../service/WorkflowExecutionService.scala | 25 +- .../texera/web/service/WorkflowService.scala | 2 +- .../apache/texera/workflow/LogicalLink.scala | 32 ++ .../texera/workflow/WorkflowCompiler.scala | 2 +- .../amber/engine/e2e/DataProcessingSpec.scala | 2 +- .../texera/amber/engine/e2e/TestUtils.scala | 2 +- .../texera/workflow/LogicalLinkSpec.scala | 37 +- .../PhysicalPlanCompiledRoundTripSpec.scala | 148 ++++++++ .../PhysicalPlanRoundTripThoroughSpec.scala | 196 ++++++++++ .../workflow/WorkflowCompilerSpec.scala | 2 +- .../amber/config/EnvironmentalVariable.scala | 4 + .../core/workflow/DerivePartitionSpec.scala | 110 ++++++ .../core/workflow/LocationPreference.scala | 19 +- .../amber/core/workflow/PhysicalOp.scala | 137 +++++-- .../amber/core/workflow/PhysicalPlan.scala | 7 + .../apache/texera/amber/util/JSONUtils.scala | 9 +- .../amber/util/PhysicalPlanSerdeModule.scala | 70 ++++ .../util/serde/LocationPreferenceSerde.scala | 88 +++++ .../util/serde/OpExecInitInfoSerde.scala | 141 +++++++ .../amber/util/serde/OutputModeSerde.scala | 71 ++++ .../util/serde/PhysicalOpDeserializer.scala | 196 ++++++++++ .../util/serde/PhysicalOpSerializer.scala | 119 ++++++ .../util/serde/PhysicalPlanDeserializer.scala | 95 +++++ .../PhysicalPlanSerializationSpec.scala | 344 ++++++++++++++++++ .../operator/aggregate/AggregateOpDesc.scala | 2 +- .../difference/DifferenceOpDesc.scala | 2 +- .../operator/distinct/DistinctOpDesc.scala | 10 +- .../operator/hashJoin/HashJoinOpDesc.scala | 2 +- .../operator/intersect/IntersectOpDesc.scala | 2 +- .../projection/ProjectionOpDesc.scala | 17 +- .../SymmetricDifferenceOpDesc.scala | 2 +- .../operator/udf/java/JavaUDFOpDesc.scala | 4 +- .../DualInputPortsPythonUDFOpDescV2.scala | 2 +- .../udf/python/PythonUDFOpDescV2.scala | 2 +- .../amber/operator/udf/r/RUDFOpDesc.scala | 2 +- .../service/WorkflowCompilingService.scala | 12 +- .../WorkflowCompilationResource.scala | 21 +- 41 files changed, 2025 insertions(+), 140 deletions(-) create mode 100644 amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala create mode 100644 amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala create mode 100644 amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/DerivePartitionSpec.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/PhysicalPlanSerdeModule.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/LocationPreferenceSerde.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OpExecInitInfoSerde.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OutputModeSerde.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpDeserializer.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpSerializer.scala create mode 100644 common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalPlanDeserializer.scala create mode 100644 common/workflow-core/src/test/scala/org/apache/texera/amber/core/workflow/PhysicalPlanSerializationSpec.scala diff --git a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala index 4c2220ae9aa..6644a6c5a6d 100644 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala @@ -24,6 +24,9 @@ import org.apache.texera.workflow.LogicalPlan case class Workflow( context: WorkflowContext, - logicalPlan: LogicalPlan, + // The logical plan is only retained for in-JVM compilation paths (amber's WorkflowCompiler, + // e2e TestUtils). The runtime offloads compilation to the workflow-compiling-service over HTTP + // and builds a Workflow with `logicalPlan = None`; nothing on the execution path reads it. + logicalPlan: Option[LogicalPlan] = None, physicalPlan: PhysicalPlan ) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala index d3047db5802..359b2929c27 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala @@ -32,7 +32,7 @@ import org.apache.texera.amber.core.virtualidentity.{ OperatorIdentity, WorkflowIdentity } -import org.apache.texera.amber.core.workflow.{PortIdentity, WorkflowContext, WorkflowSettings} +import org.apache.texera.amber.core.workflow.{PortIdentity, WorkflowSettings} import org.apache.texera.amber.engine.architecture.rpc.controlcommands.{ ConsoleMessage, ConsoleMessageType @@ -49,7 +49,7 @@ import org.apache.texera.auth.SessionUser import org.apache.texera.dao.SqlServer import org.apache.texera.dao.jooq.generated.Tables.OPERATOR_EXECUTIONS import org.apache.texera.web.model.websocket.request.{LogicalPlanPojo, WorkflowExecuteRequest} -import org.apache.texera.workflow.{LogicalLink, WorkflowCompiler} +import org.apache.texera.workflow.LogicalLink import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource import org.apache.texera.web.service.{ExecutionResultService, WorkflowService} import org.apache.texera.web.storage.ExecutionStateStore.updateWorkflowState @@ -887,28 +887,6 @@ class SyncExecutionResource extends LazyLogging { ) } - // Returns operator-id -> error message; empty map means compilation succeeded. - private def validateWorkflow( - workflowId: Long, - logicalPlan: LogicalPlanPojo - ): Map[String, String] = { - try { - val tempContext = new WorkflowContext(WorkflowIdentity(workflowId)) - val compiler = new WorkflowCompiler(tempContext) - compiler.compile(logicalPlan) - Map.empty - } catch { - case e: Exception => - val errorMsg = Option(e.getMessage).getOrElse("Compilation failed") - val operatorIdPattern = """operator[- ]?(\S+)""".r - val operatorId = operatorIdPattern - .findFirstMatchIn(errorMsg.toLowerCase) - .map(_.group(1)) - .getOrElse("workflow") - Map(operatorId -> errorMsg) - } - } - @GET @Path("/health") def healthCheck: Map[String, String] = Map("status" -> "ok") diff --git a/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala b/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala new file mode 100644 index 00000000000..ba7022b90f2 --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web.service + +import com.fasterxml.jackson.databind.JsonNode +import org.apache.texera.amber.config.EnvironmentalVariable +import org.apache.texera.amber.core.workflow.PhysicalPlan +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.apache.texera.web.model.websocket.request.LogicalPlanPojo + +import java.net.{HttpURLConnection, URL} +import java.nio.charset.StandardCharsets +import scala.jdk.CollectionConverters.IteratorHasAsScala + +/** + * Compiles a workflow (logical plan -> physical plan) by calling the workflow-compiling-service's + * `POST /api/compile` over HTTP, instead of running [[org.apache.texera.workflow.WorkflowCompiler]] + * in-process. + * + * This lets the computing-unit master offload compilation to a dedicated service: it serializes the + * [[LogicalPlanPojo]] with [[objectMapper]], posts it with the forwarded `USER_JWT_TOKEN`, and reads + * back the `WorkflowCompilationResponse` union (`success` / `failure`). On `success` it returns the + * deserialized [[PhysicalPlan]] (the same JSONUtils serializers are registered on both ends, so the + * plan round-trips); on `failure` it raises a [[RuntimeException]] carrying the per-operator errors, + * which the caller's existing catch surfaces to the frontend exactly like an in-process failure. + * + * The compiling-service's response classes live in a sibling module that amber does not depend on, + * so the response is parsed from a JSON tree by its `type` discriminator rather than via those + * classes. + */ +object CompilingServiceClient { + + private lazy val userJwtToken: String = + sys.env.getOrElse(EnvironmentalVariable.ENV_USER_JWT_TOKEN, "").trim + + private lazy val endpoint: String = + sys.env + .get(EnvironmentalVariable.ENV_WORKFLOW_COMPILING_SERVICE_ENDPOINT) + .map(_.trim) + .filter(_.nonEmpty) + .getOrElse("http://localhost:9090/api/compile") + + /** + * @throws RuntimeException if compilation fails (message includes the per-operator errors) or the + * service returns a non-2xx response. + */ + def compile( + logicalPlan: LogicalPlanPojo, + workflowId: Long, + executionId: Long + ): PhysicalPlan = compile(endpoint, userJwtToken, logicalPlan, workflowId, executionId) + + /** Endpoint/token are injectable so the HTTP round-trip can be unit-tested in isolation. */ + private[service] def compile( + endpoint: String, + token: String, + logicalPlan: LogicalPlanPojo, + workflowId: Long, + executionId: Long + ): PhysicalPlan = { + val requestUrl = s"$endpoint?workflowId=$workflowId&executionId=$executionId" + val payload = objectMapper.writeValueAsBytes(logicalPlan) + val connection = new URL(requestUrl).openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("POST") + connection.setRequestProperty("Authorization", s"Bearer $token") + connection.setRequestProperty("Content-Type", "application/json") + connection.setDoOutput(true) + try { + connection.getOutputStream.write(payload) + val code = connection.getResponseCode + if (code < 200 || code >= 300) { + val errorBody = readBody(connection.getErrorStream) + throw new RuntimeException( + s"workflow-compiling-service /compile failed (HTTP $code)" + + (if (errorBody.nonEmpty) s": $errorBody" else "") + ) + } + val response = + objectMapper.readTree(connection.getInputStream.readAllBytes()) + parseResponse(response) + } finally { + connection.disconnect() + } + } + + /** + * Interprets the `WorkflowCompilationResponse` union: returns the [[PhysicalPlan]] on `success`, + * throws with the collected operator errors on `failure`. + */ + private def parseResponse(response: JsonNode): PhysicalPlan = { + val responseType = Option(response.get("type")).map(_.asText()).getOrElse("") + responseType match { + case "success" => + val planNode = response.get("physicalPlan") + if (planNode == null || planNode.isNull) { + throw new RuntimeException( + "workflow-compiling-service returned a success response without a physicalPlan" + ) + } + objectMapper.treeToValue(planNode, classOf[PhysicalPlan]) + + case "failure" => + throw new RuntimeException( + s"Workflow compilation failed: ${formatOperatorErrors(response.get("operatorErrors"))}" + ) + + case other => + throw new RuntimeException( + s"workflow-compiling-service returned an unrecognized response type '$other'" + ) + } + } + + /** Flattens `operatorErrors: Map[operatorId, WorkflowFatalError]` into a readable message. */ + private def formatOperatorErrors(errorsNode: JsonNode): String = { + if (errorsNode == null || !errorsNode.isObject || errorsNode.isEmpty) { + "unknown compilation error" + } else { + errorsNode + .fields() + .asScala + .map { entry => + val opId = entry.getKey + val err = entry.getValue + val message = Option(err.get("message")).map(_.asText()).filter(_.nonEmpty) + val details = Option(err.get("details")).map(_.asText()).filter(_.nonEmpty) + val text = (message ++ details).mkString(" - ") + s"$opId: ${if (text.nonEmpty) text else "compilation error"}" + } + .mkString("; ") + } + } + + private def readBody(stream: java.io.InputStream): String = + if (stream == null) "" + else new String(stream.readAllBytes(), StandardCharsets.UTF_8).trim +} diff --git a/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala b/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala index e7617fdfe16..2aa0855db86 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala @@ -58,22 +58,34 @@ class ExecutionReconfigurationService( def modifyOperatorLogic(modifyLogicRequest: ModifyLogicRequest): TexeraWebSocketEvent = { val newOp = modifyLogicRequest.operator val opId = newOp.operatorIdentifier - val currentOp = workflow.logicalPlan.getOperator(opId) - val reconfiguredPhysicalOp = - currentOp.runtimeReconfiguration( - workflow.context.workflowId, - workflow.context.executionId, - currentOp, - newOp - ) - reconfiguredPhysicalOp match { - case Failure(exception) => ModifyLogicResponse(opId.id, isValid = false, exception.getMessage) - case Success(op) => { - stateStore.reconfigurationStore.updateState(old => - old.copy(unscheduledReconfigurations = old.unscheduledReconfigurations :+ op) + // Reconfiguration derives the new physical op from the original logical op. The logical plan is + // only present when the workflow was compiled in-process; when compilation was offloaded to the + // compiling-service the runtime holds only the physical plan, so reconfiguration is unavailable. + workflow.logicalPlan match { + case None => + ModifyLogicResponse( + opId.id, + isValid = false, + "Operator reconfiguration is unavailable: the logical plan is not retained at runtime." ) - ModifyLogicResponse(opId.id, isValid = true, "") - } + case Some(logicalPlan) => + val currentOp = logicalPlan.getOperator(opId) + val reconfiguredPhysicalOp = + currentOp.runtimeReconfiguration( + workflow.context.workflowId, + workflow.context.executionId, + currentOp, + newOp + ) + reconfiguredPhysicalOp match { + case Failure(exception) => + ModifyLogicResponse(opId.id, isValid = false, exception.getMessage) + case Success(op) => + stateStore.reconfigurationStore.updateState(old => + old.copy(unscheduledReconfigurations = old.unscheduledReconfigurations :+ op) + ) + ModifyLogicResponse(opId.id, isValid = true, "") + } } } diff --git a/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala b/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala index 741687e02c9..fc3eabc7b98 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala @@ -38,7 +38,6 @@ import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutions import org.apache.texera.web.storage.ExecutionStateStore import org.apache.texera.web.storage.ExecutionStateStore.updateWorkflowState import org.apache.texera.web.{ComputingUnitMaster, SubscriptionManager, WebsocketInput} -import org.apache.texera.workflow.WorkflowCompiler import java.net.URI import scala.collection.mutable @@ -104,13 +103,23 @@ class WorkflowExecutionService( var executionConsoleService: ExecutionConsoleService = _ def executeWorkflow(): Unit = { - try { - workflow = new WorkflowCompiler(workflowContext) - .compile(request.logicalPlan) - } catch { - case err: Throwable => - errorHandler(err) - } + // Offload compilation to the workflow-compiling-service over HTTP and run the returned plan. + // The runtime does not need the logical plan, so it is left as None on the Workflow. + val physicalPlan = + try { + CompilingServiceClient.compile( + request.logicalPlan, + workflowContext.workflowId.id, + workflowContext.executionId.id + ) + } catch { + case err: Throwable => + // Compilation failed (e.g. an invalid workflow). Surface the error and stop here — + // continuing would dereference a null `workflow` and mask the real failure with an NPE. + errorHandler(err) + return + } + workflow = Workflow(workflowContext, None, physicalPlan) client = ComputingUnitMaster.createAmberRuntime( workflow.context, diff --git a/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala b/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala index 809faf6a520..90d4a20edf8 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala @@ -134,7 +134,7 @@ class WorkflowService( (oldState, newState) => { if (oldState.state != COMPLETED && newState.state == COMPLETED) { - lastCompletedLogicalPlan = Option.apply(executionService.workflow.logicalPlan) + lastCompletedLogicalPlan = executionService.workflow.logicalPlan } Iterable.empty } diff --git a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala b/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala index e6553e3cdf1..bf284d1d56e 100644 --- a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala +++ b/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala @@ -20,9 +20,15 @@ package org.apache.texera.workflow import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty} +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind.annotation.JsonSerialize +import com.fasterxml.jackson.databind.{JsonSerializer, SerializerProvider} import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity +// Serialized by LogicalLinkSerializer, which writes fromOpId / toOpId as bare string ids — the same +// shape the @JsonCreator string constructor reads — so a LogicalLink round-trips through JSON. +@JsonSerialize(using = classOf[LogicalLinkSerializer]) case class LogicalLink( @JsonProperty("fromOpId") fromOpId: OperatorIdentity, fromPortId: PortIdentity, @@ -52,3 +58,29 @@ case class LogicalLink( this(OperatorIdentity(fromOpId), fromPortId, OperatorIdentity(toOpId), toPortId) } } + +/** + * Emits `fromOpId` / `toOpId` as bare string ids (not the `{"id": ...}` object form Jackson would + * derive from the `OperatorIdentity` case class), matching the shape the `@JsonCreator` string + * constructor consumes. Without this, `writeValueAsString` produces JSON that the link's own + * deserializer cannot read back. Ports keep their default object serialization. See + * https://github.com/apache/texera/issues/5042. The ComputingUnitMaster -> + * workflow-compiling-service path relies on this round-trip (it re-serializes a logical plan + * and ships it over HTTP). + */ +class LogicalLinkSerializer extends JsonSerializer[LogicalLink] { + override def serialize( + link: LogicalLink, + gen: JsonGenerator, + provider: SerializerProvider + ): Unit = { + gen.writeStartObject() + gen.writeStringField("fromOpId", link.fromOpId.id) + gen.writeFieldName("fromPortId") + provider.defaultSerializeValue(link.fromPortId, gen) + gen.writeStringField("toOpId", link.toOpId.id) + gen.writeFieldName("toPortId") + provider.defaultSerializeValue(link.toPortId, gen) + gen.writeEndObject() + } +} diff --git a/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala b/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala index b93aa3e4db3..d5bd64ad886 100644 --- a/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala +++ b/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala @@ -155,6 +155,6 @@ class WorkflowCompiler( outputPortsNeedingStorage = outputPortsNeedingStorage ) - Workflow(context, logicalPlan, physicalPlan) + Workflow(context, Some(logicalPlan), physicalPlan) } } diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala index d070fefb275..cd9ed4248ea 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala @@ -119,7 +119,7 @@ class DataProcessingSpec client .registerCallback[ExecutionStateUpdate](evt => { if (evt.state == COMPLETED) { - results = workflow.logicalPlan.getTerminalOperatorIds + results = workflow.logicalPlan.get.getTerminalOperatorIds .filter(terminalOpId => { val uri = getResultUriByLogicalPortId( workflowContext.executionId, diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala index bcc43b396b1..6be8f15a4be 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala @@ -188,7 +188,7 @@ object TestUtils { var result: Map[OperatorIdentity, List[Tuple]] = null client.registerCallback[ExecutionStateUpdate](evt => { if (evt.state == COMPLETED) { - result = workflow.logicalPlan.getTerminalOperatorIds + result = workflow.logicalPlan.get.getTerminalOperatorIds .filter(terminalOpId => { val uri = getResultUriByLogicalPortId( workflow.context.executionId, diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index bd56aa7d5f6..fa82c8ef752 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -20,7 +20,7 @@ package org.apache.texera.workflow import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.exc.{MismatchedInputException, ValueInstantiationException} +import com.fasterxml.jackson.databind.exc.ValueInstantiationException import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity import org.apache.texera.amber.util.JSONUtils.objectMapper @@ -245,17 +245,13 @@ class LogicalLinkSpec extends AnyFlatSpec { assert(tree.has("toPortId")) } - it should "NOT round-trip through writeValueAsString (the @JsonCreator string overload is incompatible with the object-shape OperatorIdentity that writeValueAsString emits)" in { - // Characterization of a real asymmetry tracked by - // https://github.com/apache/texera/issues/5042. Production reads - // user-saved workflow JSON where `fromOpId`/`toOpId` are plain - // strings, but `objectMapper.writeValueAsString` writes - // OperatorIdentity as `{"id":"op-A"}` (the case-class object form). - // Re-reading the emitted JSON fails because Jackson dispatches on the - // @JsonCreator string overload, which can't accept an object for - // fromOpId. When the issue is fixed (additional @JsonCreator object - // overload or a custom @JsonDeserialize), this test must flip to a - // passing round-trip assertion alongside the fix. + it should "round-trip through writeValueAsString (fromOpId / toOpId emitted as bare strings)" in { + // Regression for https://github.com/apache/texera/issues/5042. Previously + // `objectMapper.writeValueAsString` wrote OperatorIdentity as the object `{"id":"op-A"}`, + // which the @JsonCreator string constructor could not re-read. `OperatorIdentityStringSerializer` + // now emits fromOpId / toOpId as bare strings — the same shape that constructor consumes — so a + // LogicalLink survives a writeValueAsString -> readValue round-trip. The ComputingUnitMaster -> + // workflow-compiling-service path relies on this (it re-serializes a logical plan over HTTP). val original = LogicalLink( OperatorIdentity("op-A"), PortIdentity(0), @@ -263,17 +259,14 @@ class LogicalLinkSpec extends AnyFlatSpec { PortIdentity(1) ) val json = objectMapper.writeValueAsString(original) - // Parse the emitted JSON and confirm the structural shape — fromOpId - // is an object with an `id` field of "op-A". Avoids depending on - // exact key ordering or escaping. + // fromOpId / toOpId are emitted as bare strings (not the `{"id": ...}` object form). val tree = objectMapper.readTree(json) - assert(tree.path("fromOpId").isObject, s"expected fromOpId to be an object: $json") - assert(tree.path("fromOpId").path("id").asText() == "op-A") - // Re-reading the just-emitted JSON fails because the @JsonCreator - // String overload can't accept the object-shape fromOpId. - intercept[MismatchedInputException] { - objectMapper.readValue(json, classOf[LogicalLink]) - } + assert(tree.path("fromOpId").isTextual, s"expected fromOpId to be a string: $json") + assert(tree.path("fromOpId").asText() == "op-A") + assert(tree.path("toOpId").isTextual, s"expected toOpId to be a string: $json") + assert(tree.path("toOpId").asText() == "op-B") + // Re-reading the just-emitted JSON now succeeds and reproduces the original link. + assert(objectMapper.readValue(json, classOf[LogicalLink]) == original) } it should "reject missing string op-id fields when deserializing via Jackson" in { diff --git a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala new file mode 100644 index 00000000000..f36936bc445 --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.workflow + +import org.apache.texera.amber.core.executor.{OpExecWithClassName, OpExecWithCode} +import org.apache.texera.amber.core.workflow.{PartitionInfo, PhysicalPlan, PortIdentity, UnknownPartition, WorkflowContext} +import org.apache.texera.amber.operator.TestOperators +import org.apache.texera.amber.operator.aggregate.AggregationFunction +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +/** + * End-to-end proof that a `PhysicalPlan` produced by the real [[WorkflowCompiler]] (the + * exact code path the engine uses) survives a JSON round-trip and comes back structurally + * and runtime-equivalent. This complements the workflow-core `PhysicalPlanSerializationSpec` + * (which builds a plan by hand) by exercising a compiler-generated plan, including the + * multi-physical-op expansion of an Aggregate and its hash partitioning. + */ +class PhysicalPlanCompiledRoundTripSpec extends AnyFlatSpec with Matchers { + + private def pojo( + operators: List[org.apache.texera.amber.operator.LogicalOp], + links: List[LogicalLink] + ): LogicalPlanPojo = + LogicalPlanPojo(operators, links, List.empty, List.empty) + + /** Compiles CSV scan -> group-by aggregate into a physical plan. */ + private def compiledPlan(): PhysicalPlan = { + val csv = TestOperators.smallCsvScanOpDesc() + val agg = TestOperators.aggregateAndGroupByDesc( + "Units Sold", + AggregationFunction.SUM, + List("Country") + ) + val ctx = new WorkflowContext() + val workflow = new WorkflowCompiler(ctx).compile( + pojo( + List(csv, agg), + List( + LogicalLink(csv.operatorIdentifier, PortIdentity(), agg.operatorIdentifier, PortIdentity()) + ) + ) + ) + workflow.physicalPlan + } + + private def roundTrip(plan: PhysicalPlan): PhysicalPlan = { + val json = objectMapper.writeValueAsString(plan) + objectMapper.readValue(json, classOf[PhysicalPlan]) + } + + "A compiler-produced PhysicalPlan" should "round-trip its operator id set and links" in { + val plan = compiledPlan() + val back = roundTrip(plan) + back.operators.map(_.id) shouldBe plan.operators.map(_.id) + back.links shouldBe plan.links + } + + it should "round-trip every operator's opExecInitInfo, partition spec, requirement, and location" in { + val plan = compiledPlan() + val back = roundTrip(plan) + + val sampleInputs = List(UnknownPartition(): PartitionInfo) + + plan.operators.foreach { orig => + val restored = back.getOperator(orig.id) + + // opExecInitInfo: the runtime-critical executor descriptor must be identical, and a + // className/code value must survive as the same concrete subtype the engine builds from. + restored.opExecInitInfo shouldBe orig.opExecInitInfo + orig.opExecInitInfo match { + case _: OpExecWithClassName => restored.opExecInitInfo shouldBe a[OpExecWithClassName] + case _: OpExecWithCode => restored.opExecInitInfo shouldBe a[OpExecWithCode] + case _ => // OpExecSource / Empty: equality assertion above suffices + } + + restored.partitionRequirement shouldBe orig.partitionRequirement + restored.partitionDeriveSpec shouldBe orig.partitionDeriveSpec + restored.locationPreference shouldBe orig.locationPreference + + // the rebuilt derivePartition function reproduces the original output + val padded = orig.inputPorts.keys.toList.indices.map(_ => UnknownPartition(): PartitionInfo).toList + val inputs = if (padded.isEmpty) sampleInputs else padded + restored.derivePartition(inputs) shouldBe orig.derivePartition(inputs) + } + } + + it should "round-trip per-port output schemas and rehydrate per-port links" in { + val plan = compiledPlan() + val back = roundTrip(plan) + + plan.operators.foreach { orig => + val restored = back.getOperator(orig.id) + + // per-port output schemas + orig.outputPorts.foreach { + case (portId, (_, _, schemaEither)) => + restored.outputPorts(portId)._3.toOption shouldBe schemaEither.toOption + } + + // per-port input/output link lists rehydrated from plan.links + restored.getInputLinks() should contain theSameElementsAs orig.getInputLinks() + orig.outputPorts.keys.foreach { portId => + restored.getOutputLinks(portId) should contain theSameElementsAs orig.getOutputLinks(portId) + } + } + } + + it should "produce an opExecInitInfo that still pattern-matches as a runnable descriptor" in { + val plan = compiledPlan() + val back = roundTrip(plan) + // Mirror the SerializationManager match: every op's restored opExecInitInfo must be a + // kind the engine knows how to instantiate an executor from. + back.operators.foreach { op => + val runnable = op.opExecInitInfo match { + case OpExecWithClassName(className, _) => className.nonEmpty + case OpExecWithCode(code, _) => code.nonEmpty + case other => fail(s"unexpected opExecInitInfo after round-trip: $other") + } + assert(runnable, s"op ${op.id} lost its executor descriptor") + } + } + + "A compiler-produced PhysicalPlan round-trip" should "reject malformed JSON" in { + assertThrows[Exception] { + objectMapper.readValue("{ not valid ", classOf[PhysicalPlan]) + } + } +} diff --git a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala new file mode 100644 index 00000000000..6ae35f071ea --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.workflow + +import org.apache.texera.amber.core.executor.{OpExecWithClassName, OpExecWithCode} +import org.apache.texera.amber.core.workflow.{ + HashPartition, + PartitionInfo, + PhysicalPlan, + PortIdentity, + ToHash, + ToUnknown, + UnknownPartition, + WorkflowContext +} +import org.apache.texera.amber.operator.TestOperators +import org.apache.texera.amber.operator.aggregate.AggregationFunction +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +/** + * Thorough round-trip coverage over a VARIETY of real, compiler-produced physical plans — + * hash join (`ToHash` on the probe key, two inputs, partition requirement), Python UDF + * (`OpExecWithCode`, `ToUnknown`), a keyword-search filter (one-to-one / `Passthrough`), + * and a multi-operator chain. For each it asserts full structural + runtime equivalence + * after a JSON round-trip, and that a second round-trip is a fixed point (stable). + * + * This complements the single-workflow `PhysicalPlanCompiledRoundTripSpec` and the + * hand-built `PhysicalPlanSerializationSpec`. + */ +class PhysicalPlanRoundTripThoroughSpec extends AnyFlatSpec with Matchers { + + private def pojo( + operators: List[org.apache.texera.amber.operator.LogicalOp], + links: List[LogicalLink] + ): LogicalPlanPojo = LogicalPlanPojo(operators, links, List.empty, List.empty) + + private def compile( + operators: List[org.apache.texera.amber.operator.LogicalOp], + links: List[LogicalLink] + ): PhysicalPlan = + new WorkflowCompiler(new WorkflowContext()).compile(pojo(operators, links)).physicalPlan + + private def roundTrip(plan: PhysicalPlan): PhysicalPlan = + objectMapper.readValue(objectMapper.writeValueAsString(plan), classOf[PhysicalPlan]) + + /** Deep equivalence of every serialized + runtime-critical field, plus rehydrated links. */ + private def assertEquivalent(orig: PhysicalPlan, back: PhysicalPlan): Unit = { + back.operators.map(_.id) shouldBe orig.operators.map(_.id) + back.links shouldBe orig.links + + orig.operators.foreach { o => + val r = back.getOperator(o.id) + withClue(s"operator ${o.id}: ") { + r.opExecInitInfo shouldBe o.opExecInitInfo + r.partitionDeriveSpec shouldBe o.partitionDeriveSpec + r.partitionRequirement shouldBe o.partitionRequirement + r.locationPreference shouldBe o.locationPreference + r.parallelizable shouldBe o.parallelizable + r.isOneToManyOp shouldBe o.isOneToManyOp + r.suggestedWorkerNum shouldBe o.suggestedWorkerNum + r.pveName shouldBe o.pveName + + o.outputPorts.foreach { + case (pid, (_, _, schema)) => + r.outputPorts(pid)._3.toOption shouldBe schema.toOption + } + o.inputPorts.foreach { + case (pid, (_, _, schema)) => + r.inputPorts(pid)._3.toOption shouldBe schema.toOption + } + + // the reconstructed derivePartition reproduces the original output exactly + val inputs = + o.inputPorts.keys.toList.indices.map(_ => UnknownPartition(): PartitionInfo).toList + val sample = if (inputs.isEmpty) List(UnknownPartition(): PartitionInfo) else inputs + r.derivePartition(sample) shouldBe o.derivePartition(sample) + + // per-port links rehydrated from plan.links + r.getInputLinks() should contain theSameElementsAs o.getInputLinks() + o.outputPorts.keys.foreach { pid => + r.getOutputLinks(pid) should contain theSameElementsAs o.getOutputLinks(pid) + } + } + } + } + + /** A second round-trip must be a fixed point. */ + private def assertStable(plan: PhysicalPlan): Unit = { + val once = roundTrip(plan) + assertEquivalent(once, roundTrip(once)) + } + + "A hash-join plan" should "round-trip ToHash(probe key), the partition requirement, and two inputs" in { + val build = TestOperators.smallCsvScanOpDesc() + val probe = TestOperators.smallCsvScanOpDesc() + val join = TestOperators.joinOpDesc("Country", "Country") + val plan = compile( + List(build, probe, join), + List( + LogicalLink(build.operatorIdentifier, PortIdentity(), join.operatorIdentifier, PortIdentity()), + LogicalLink(probe.operatorIdentifier, PortIdentity(), join.operatorIdentifier, PortIdentity(1)) + ) + ) + val back = roundTrip(plan) + assertEquivalent(plan, back) + + // A hash join expands into several physical ops; its hash distribution lives in a + // ToHash derivePartition and/or a HashPartition requirement on some of them. Assert + // that hash distribution exists (so this is a real hash-join fixture) — the round-trip + // of every field on every op is already asserted by assertEquivalent above. + val joinOps = plan.getPhysicalOpsOfLogicalOp(join.operatorIdentifier) + joinOps should not be empty + joinOps.exists(jo => + jo.partitionDeriveSpec.isInstanceOf[ToHash] || + jo.partitionRequirement.flatten.exists(_.isInstanceOf[HashPartition]) + ) shouldBe true + assertStable(plan) + } + + "A Python-UDF plan" should "round-trip OpExecWithCode and ToUnknown" in { + val csv = TestOperators.smallCsvScanOpDesc() + val python = TestOperators.pythonOpDesc() + val plan = compile( + List(csv, python), + List( + LogicalLink(csv.operatorIdentifier, PortIdentity(), python.operatorIdentifier, PortIdentity()) + ) + ) + val back = roundTrip(plan) + assertEquivalent(plan, back) + + val pyOps = plan.getPhysicalOpsOfLogicalOp(python.operatorIdentifier) + pyOps should not be empty + pyOps.foreach { p => + p.opExecInitInfo shouldBe a[OpExecWithCode] + p.partitionDeriveSpec shouldBe ToUnknown() + val rb = back.getOperator(p.id) + rb.opExecInitInfo shouldBe a[OpExecWithCode] + rb.opExecInitInfo shouldBe p.opExecInitInfo + rb.partitionDeriveSpec shouldBe ToUnknown() + } + } + + "A filter (keyword-search) plan" should "round-trip a one-to-one operator" in { + val csv = TestOperators.smallCsvScanOpDesc() + val filter = TestOperators.keywordSearchOpDesc("Country", "United States") + val plan = compile( + List(csv, filter), + List( + LogicalLink(csv.operatorIdentifier, PortIdentity(), filter.operatorIdentifier, PortIdentity()) + ) + ) + assertEquivalent(plan, roundTrip(plan)) + // the scan source keeps its class-name executor descriptor + plan + .getPhysicalOpsOfLogicalOp(csv.operatorIdentifier) + .foreach(_.opExecInitInfo shouldBe a[OpExecWithClassName]) + assertStable(plan) + } + + "A multi-operator chain (scan -> filter -> group-by aggregate)" should "round-trip end to end" in { + val csv = TestOperators.smallCsvScanOpDesc() + val filter = TestOperators.keywordSearchOpDesc("Country", "United States") + val agg = + TestOperators.aggregateAndGroupByDesc("Units Sold", AggregationFunction.SUM, List("Country")) + val plan = compile( + List(csv, filter, agg), + List( + LogicalLink(csv.operatorIdentifier, PortIdentity(), filter.operatorIdentifier, PortIdentity()), + LogicalLink(filter.operatorIdentifier, PortIdentity(), agg.operatorIdentifier, PortIdentity()) + ) + ) + assertEquivalent(plan, roundTrip(plan)) + assertStable(plan) + } +} diff --git a/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala index 94a8ffce552..c96d5b14a89 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala @@ -68,7 +68,7 @@ class WorkflowCompilerSpec extends AnyFlatSpec { ) ) - assert(workflow.logicalPlan.operators.size == 2) + assert(workflow.logicalPlan.get.operators.size == 2) assert(workflow.physicalPlan.getPhysicalOpsOfLogicalOp(csv.operatorIdentifier).nonEmpty) assert(workflow.physicalPlan.getPhysicalOpsOfLogicalOp(keyword.operatorIdentifier).nonEmpty) } diff --git a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala index b3a57515919..c6b8090a2de 100644 --- a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala +++ b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala @@ -46,6 +46,10 @@ object EnvironmentalVariable { // computing unit can persist/read execution metadata without holding Postgres credentials (#5011). val ENV_DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT = "DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT" + + // Endpoint of the workflow-compiling-service's /compile, so a computing unit can compile a + // workflow (logical plan -> physical plan) over HTTP instead of running the compiler in-process. + val ENV_WORKFLOW_COMPILING_SERVICE_ENDPOINT = "WORKFLOW_COMPILING_SERVICE_ENDPOINT" // When "true", the computing unit routes execution-metadata operations over HTTP instead of JDBC. val ENV_EXECUTION_METADATA_REMOTE = "EXECUTION_METADATA_REMOTE" diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/DerivePartitionSpec.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/DerivePartitionSpec.scala new file mode 100644 index 00000000000..8ecdf119265 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/DerivePartitionSpec.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.workflow + +import com.fasterxml.jackson.annotation.JsonSubTypes.Type +import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} + +/** + * A serializable description of how a [[PhysicalOp]] derives its output partition + * from its input partitions. + * + * The runtime previously stored this as a function closure + * (`List[PartitionInfo] => PartitionInfo`) directly on `PhysicalOp`, which made the + * `PhysicalOp` impossible to serialize to JSON. This ADT captures everything the + * function needs as plain, Jackson-serializable data; the actual function is rebuilt + * lazily via [[toFunction]] after deserialization, reproducing the original behavior + * exactly. + */ +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type") +@JsonSubTypes( + Array( + new Type(value = classOf[Passthrough], name = "passthrough"), + new Type(value = classOf[ToSingle], name = "toSingle"), + new Type(value = classOf[ToHash], name = "toHash"), + new Type(value = classOf[ToUnknown], name = "toUnknown"), + new Type(value = classOf[ProjectionPartition], name = "projection") + ) +) +sealed trait DerivePartitionSpec { + + /** + * Rebuilds the partition-derivation function described by this spec. + */ + def toFunction: List[PartitionInfo] => PartitionInfo +} + +/** + * Default behavior: the output partition is the same as the (first) input partition. + * Matches the historical default closure `inputParts => inputParts.head`. + */ +final case class Passthrough() extends DerivePartitionSpec { + override def toFunction: List[PartitionInfo] => PartitionInfo = inputParts => inputParts.head +} + +/** + * Always produces a [[SinglePartition]] regardless of the inputs. + * Used by `manyToOnePhysicalOp`. + */ +final case class ToSingle() extends DerivePartitionSpec { + override def toFunction: List[PartitionInfo] => PartitionInfo = _ => SinglePartition() +} + +/** + * Always produces a [[HashPartition]] on the given attribute names (empty means all + * attributes). Used by Aggregate (group-by keys), HashJoin (probe attribute), + * Intersect, Distinct, Difference, and SymmetricDifference. + */ +final case class ToHash(hashAttributeNames: List[String] = List.empty) extends DerivePartitionSpec { + override def toFunction: List[PartitionInfo] => PartitionInfo = + _ => HashPartition(hashAttributeNames) +} + +/** + * Always produces an [[UnknownPartition]] regardless of the inputs. + * Used by Python / Java / R UDF operators. + */ +final case class ToUnknown() extends DerivePartitionSpec { + override def toFunction: List[PartitionInfo] => PartitionInfo = _ => UnknownPartition() +} + +/** + * Reproduces the partition-derivation logic of `ProjectionOpDesc`. + * + * The original closure inspects only the incoming partition and re-emits it, + * collapsing a hash/range partition to [[UnknownPartition]] when its attribute-name + * list is empty. It does not depend on any operator-specific descriptor state, so the + * spec carries no fields and reproduces the function exactly. + */ +final case class ProjectionPartition() extends DerivePartitionSpec { + override def toFunction: List[PartitionInfo] => PartitionInfo = + partition => { + val inputPartitionInfo = partition.head + inputPartitionInfo match { + case HashPartition(hashAttributeNames) => + if (hashAttributeNames.nonEmpty) HashPartition(hashAttributeNames) + else UnknownPartition() + case RangePartition(rangeAttributeNames, min, max) => + if (rangeAttributeNames.nonEmpty) RangePartition(rangeAttributeNames, min, max) + else UnknownPartition() + case _ => inputPartitionInfo + } + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/LocationPreference.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/LocationPreference.scala index a8308d3f3fd..9ef4301e044 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/LocationPreference.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/LocationPreference.scala @@ -19,15 +19,30 @@ package org.apache.texera.amber.core.workflow +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} +import org.apache.texera.amber.util.serde.{ + LocationPreferenceDeserializer, + LocationPreferenceSerializer +} + // LocationPreference defines where operators should run. +// +// The two concrete preferences are Scala `case object`s (singletons), so they cannot +// carry a `@JsonTypeInfo` discriminator the way ordinary case classes do. Jackson +// (de)serialization is provided by a dedicated (de)serializer registered on the trait, +// which emits/parses a single `{"type": ...}` discriminator and always returns the +// canonical singleton instance. This keeps `eq` identity and pattern matching +// (`case PreferController =>`) working unchanged across a serialization round-trip. +@JsonSerialize(using = classOf[LocationPreferenceSerializer]) +@JsonDeserialize(using = classOf[LocationPreferenceDeserializer]) sealed trait LocationPreference extends Serializable // PreferController: Run on the controller node. // Example: For scan operators reading files. -object PreferController extends LocationPreference +case object PreferController extends LocationPreference // RoundRobinPreference: Distribute across worker nodes, per operator. // Example: // - Operator A: Worker 1 -> Node 1, Worker 2 -> Node 2, Worker 3 -> Node 3 // - Operator B: Worker 1 -> Node 1, Worker 2 -> Node 2 -object RoundRobinPreference extends LocationPreference +case object RoundRobinPreference extends LocationPreference diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalOp.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalOp.scala index 44125045c97..7f019be06c4 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalOp.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalOp.scala @@ -19,8 +19,10 @@ package org.apache.texera.amber.core.workflow -import com.fasterxml.jackson.annotation.{JsonIgnore, JsonIgnoreProperties} +import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} import com.typesafe.scalalogging.LazyLogging +import org.apache.texera.amber.util.serde.{PhysicalOpDeserializer, PhysicalOpSerializer} import org.apache.texera.amber.core.executor.{OpExecInitInfo, OpExecWithCode} import org.apache.texera.amber.core.tuple.Schema import org.apache.texera.amber.core.virtualidentity.{ @@ -130,7 +132,7 @@ object PhysicalOp { opExecInitInfo, parallelizable = false, partitionRequirement = List(Option(SinglePartition())), - derivePartition = _ => SinglePartition() + partitionDeriveSpec = ToSingle() ) } @@ -156,21 +158,86 @@ object PhysicalOp { manyToOnePhysicalOp(physicalOpId, workflowId, executionId, opExecInitInfo) .withLocationPreference(Some(PreferController)) } + + /** + * Rebuilds a [[PhysicalOp]] from the data produced by the serializable + * `inputPortsSerialized` / `outputPortsSerialized` views. + * + * The runtime `inputPorts` / `outputPorts` maps carry per-port link lists and an + * `Either[Throwable, Schema]` that are not directly serializable, so they are emitted + * as slimmed-down views (dropping links, mapping the `Either` to an `Option[Schema]`). + * Here the real maps are rebuilt with EMPTY link lists; the per-port link lists are + * then rehydrated at the [[PhysicalPlan]] level by replaying `links`. + * + * This is invoked by the custom `PhysicalOpDeserializer` (registered on + * `JSONUtils.objectMapper`) rather than via a `@JsonCreator`, because + * jackson-module-scala binds case classes to their primary constructor and does not + * reliably honor a companion-object creator. + */ + def fromSerialized( + id: PhysicalOpIdentity, + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity, + opExecInitInfo: OpExecInitInfo, + parallelizable: Boolean, + locationPreference: Option[LocationPreference], + partitionRequirement: List[Option[PartitionInfo]], + partitionDeriveSpec: DerivePartitionSpec, + inputPortsSerialized: Map[PortIdentity, (InputPort, Option[Schema])], + outputPortsSerialized: Map[PortIdentity, (OutputPort, Option[Schema])], + isOneToManyOp: Boolean, + suggestedWorkerNum: Option[Int], + pveName: String + ): PhysicalOp = { + def schemaEither(schemaOpt: Option[Schema]): Either[Throwable, Schema] = + schemaOpt match { + case Some(schema) => Right(schema) + case None => Left(new SchemaNotAvailableException("schema is not available")) + } + + val rebuiltInputPorts = inputPortsSerialized.map { + case (portId, (port, schemaOpt)) => + portId -> ((port, List.empty[PhysicalLink], schemaEither(schemaOpt))) + } + val rebuiltOutputPorts = outputPortsSerialized.map { + case (portId, (port, schemaOpt)) => + portId -> ((port, List.empty[PhysicalLink], schemaEither(schemaOpt))) + } + + PhysicalOp( + id = id, + workflowId = workflowId, + executionId = executionId, + opExecInitInfo = opExecInitInfo, + parallelizable = parallelizable, + locationPreference = locationPreference, + partitionRequirement = partitionRequirement, + partitionDeriveSpec = partitionDeriveSpec, + inputPorts = rebuiltInputPorts, + outputPorts = rebuiltOutputPorts, + isOneToManyOp = isOneToManyOp, + suggestedWorkerNum = suggestedWorkerNum, + pveName = pveName + ) + } } -// In Scala case classes, @JsonIgnore on constructor parameters is not recognized by Jackson. -// Use @JsonIgnoreProperties at the class level instead. -@JsonIgnoreProperties( - Array( - "opExecInitInfo", // function type, ignore it - "derivePartition", // function type, ignore it - "inputPorts", // may contain very long stacktrace, ignore it - "outputPorts", // same reason with above - "propagateSchema", // function type, so ignore it - "locationPreference", // ignore it for the deserialization - "partitionRequirement" // ignore it for deserialization - ) -) +// JSON (de)serialization of PhysicalOp is fully delegated to a dedicated +// serializer/deserializer pair, because several fields cannot go through the default +// jackson-module-scala case-class binding: +// - `inputPorts` / `outputPorts` hold per-port link lists and an +// `Either[Throwable, Schema]`; they are emitted as the slimmed-down +// `inputPortsSerialized` / `outputPortsSerialized` views (links dropped, `Either` +// collapsed to `Option[Schema]`) and the link lists are rehydrated at the +// `PhysicalPlan` level by replaying `links`. +// - `derivePartition` / `propagateSchema` are functions: `derivePartition` is rebuilt +// lazily from the serializable `partitionDeriveSpec`, and `propagateSchema` falls back +// to its identity default on deserialize (it is only consulted at compile time). +// - `partitionRequirement` (`List[Option[PartitionInfo]]`) needs explicit handling so the +// polymorphic `PartitionInfo` type discriminator survives the `Option` wrapper. +// See `PhysicalOpSerializer` / `PhysicalOpDeserializer`. +@JsonSerialize(using = classOf[PhysicalOpSerializer]) +@JsonDeserialize(using = classOf[PhysicalOpDeserializer]) case class PhysicalOp( // the identifier of this PhysicalOp id: PhysicalOpIdentity, @@ -186,9 +253,10 @@ case class PhysicalOp( locationPreference: Option[LocationPreference] = None, // requirement of partition policy (hash/range/single/none) on inputs partitionRequirement: List[Option[PartitionInfo]] = List(), - // derive the output partition info given the input partitions - // if not specified, by default the output partition is the same as input partition - derivePartition: List[PartitionInfo] => PartitionInfo = inputParts => inputParts.head, + // serializable description of how the output partition is derived from the input + // partitions. If not specified, by default the output partition is the same as the + // (first) input partition (see [[Passthrough]]). + partitionDeriveSpec: DerivePartitionSpec = Passthrough(), // input/output ports of the physical operator // for operators with multiple input/output ports: must set these variables properly inputPorts: Map[PortIdentity, (InputPort, List[PhysicalLink], Either[Throwable, Schema])] = @@ -204,6 +272,32 @@ case class PhysicalOp( pveName: String = "" ) extends LazyLogging { + // derive the output partition info given the input partitions. Rebuilt lazily from the + // serializable `partitionDeriveSpec` so that it survives a JSON round-trip. + @JsonIgnore lazy val derivePartition: List[PartitionInfo] => PartitionInfo = + partitionDeriveSpec.toFunction + + /** + * Serializable view of [[inputPorts]] used by [[PhysicalOpSerializer]] for JSON output: + * the per-port link lists are dropped (rehydrated at the [[PhysicalPlan]] level by + * replaying links) and the `Either[Throwable, Schema]` is collapsed to an + * `Option[Schema]`. + */ + @JsonIgnore + def inputPortsSerialized: Map[PortIdentity, (InputPort, Option[Schema])] = + inputPorts.map { + case (portId, (port, _, schema)) => portId -> ((port, schema.toOption)) + } + + /** + * Serializable view of [[outputPorts]]; see [[inputPortsSerialized]]. + */ + @JsonIgnore + def outputPortsSerialized: Map[PortIdentity, (OutputPort, Option[Schema])] = + outputPorts.map { + case (portId, (port, _, schema)) => portId -> ((port, schema.toOption)) + } + // all the "dependee" links are also blocking lazy val dependeeInputs: List[PortIdentity] = inputPorts.values @@ -298,10 +392,11 @@ case class PhysicalOp( } /** - * creates a copy with the partition info derive function + * creates a copy with the partition-derivation spec. The runtime `derivePartition` + * function is rebuilt lazily from this spec. */ - def withDerivePartition(derivePartition: List[PartitionInfo] => PartitionInfo): PhysicalOp = { - this.copy(derivePartition = derivePartition) + def withDerivePartition(partitionDeriveSpec: DerivePartitionSpec): PhysicalOp = { + this.copy(partitionDeriveSpec = partitionDeriveSpec) } /** diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalPlan.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalPlan.scala index 5a2a2a61b28..52f3656113d 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalPlan.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/workflow/PhysicalPlan.scala @@ -20,8 +20,10 @@ package org.apache.texera.amber.core.workflow import com.fasterxml.jackson.annotation.JsonIgnore +import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.typesafe.scalalogging.LazyLogging import org.apache.texera.amber.core.tuple.Schema +import org.apache.texera.amber.util.serde.PhysicalPlanDeserializer import org.apache.texera.amber.core.virtualidentity.{ ActorVirtualIdentity, OperatorIdentity, @@ -36,6 +38,11 @@ import org.jgrapht.util.SupplierUtil import scala.jdk.CollectionConverters.{CollectionHasAsScala, IteratorHasAsScala} +// Deserialization is delegated to PhysicalPlanDeserializer: operators are reconstructed +// with empty per-port link lists (links are dropped from the per-port serialized views), +// then the per-port link lists are rehydrated by replaying `links`. Serialization uses the +// default representation (each operator via its own PhysicalOpSerializer). +@JsonDeserialize(using = classOf[PhysicalPlanDeserializer]) case class PhysicalPlan( operators: Set[PhysicalOp], links: Set[PhysicalLink] diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/JSONUtils.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/JSONUtils.scala index bcc72914892..c300bc6d199 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/JSONUtils.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/JSONUtils.scala @@ -20,12 +20,9 @@ package org.apache.texera.amber.util import com.fasterxml.jackson.annotation.JsonInclude.Include -import com.fasterxml.jackson.databind.module.SimpleModule import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper} import com.fasterxml.jackson.module.noctordeser.NoCtorDeserModule import com.fasterxml.jackson.module.scala.DefaultScalaModule -import org.apache.texera.amber.core.workflow.PortIdentity -import org.apache.texera.amber.util.serde.{PortIdentityKeyDeserializer, PortIdentityKeySerializer} import java.text.SimpleDateFormat import scala.jdk.CollectionConverters.IteratorHasAsScala @@ -55,11 +52,7 @@ object JSONUtils { final val objectMapper = new ObjectMapper() .registerModule(DefaultScalaModule) .registerModule(new NoCtorDeserModule()) - .registerModule( - new SimpleModule() - .addKeySerializer(classOf[PortIdentity], new PortIdentityKeySerializer()) - .addKeyDeserializer(classOf[PortIdentity], new PortIdentityKeyDeserializer()) - ) + .registerModule(PhysicalPlanSerdeModule.physicalPlanModule) .setSerializationInclusion(Include.NON_NULL) .setSerializationInclusion(Include.NON_ABSENT) .setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")) diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/PhysicalPlanSerdeModule.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/PhysicalPlanSerdeModule.scala new file mode 100644 index 00000000000..a55356cc3f3 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/PhysicalPlanSerdeModule.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.databind.module.SimpleModule +import org.apache.texera.amber.core.executor.OpExecInitInfo +import org.apache.texera.amber.core.workflow.OutputPort.OutputMode +import org.apache.texera.amber.core.workflow.PortIdentity +import org.apache.texera.amber.util.serde.{ + OpExecInitInfoDeserializer, + OpExecInitInfoSerializer, + OutputModeDeserializer, + OutputModeSerializer, + PortIdentityKeyDeserializer, + PortIdentityKeySerializer +} + +/** + * Reusable Jackson module that teaches an [[ObjectMapper]] how to serialize and deserialize a + * [[org.apache.texera.amber.core.workflow.PhysicalPlan]] / [[org.apache.texera.amber.core.workflow.PhysicalOp]]. + * + * `PhysicalOp` carries a few values that Jackson cannot handle out of the box: + * - `PortIdentity` used as a map key (serialized as a string "id_internal"), + * - the scalapb sealed-oneof `OpExecInitInfo` (tagged by a `kind` discriminator), + * - the scalapb enum `OutputPort.OutputMode` (serialized as its integer wire value). + * + * The registration lives here (rather than inline in [[JSONUtils]]) so that any other process which + * needs to round-trip a `PhysicalPlan` over JSON — notably the workflow-compiling-service's + * Dropwizard object mapper, which only ships `DefaultScalaModule` — can register the exact same + * serializers and stay byte-for-byte compatible with [[JSONUtils.objectMapper]]. + */ +object PhysicalPlanSerdeModule { + + /** Builds a fresh [[SimpleModule]] with the PhysicalPlan serializers/deserializers. */ + def physicalPlanModule: SimpleModule = + new SimpleModule() + .addKeySerializer(classOf[PortIdentity], new PortIdentityKeySerializer()) + .addKeyDeserializer(classOf[PortIdentity], new PortIdentityKeyDeserializer()) + .addSerializer(classOf[OpExecInitInfo], new OpExecInitInfoSerializer()) + .addDeserializer(classOf[OpExecInitInfo], new OpExecInitInfoDeserializer()) + .addSerializer(classOf[OutputMode], new OutputModeSerializer()) + .addDeserializer(classOf[OutputMode], new OutputModeDeserializer()) + + /** + * Registers [[physicalPlanModule]] on the given mapper and returns it (same instance), so calls + * can be chained alongside other `registerModule` invocations. + */ + def register(mapper: ObjectMapper): mapper.type = { + mapper.registerModule(physicalPlanModule) + mapper + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/LocationPreferenceSerde.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/LocationPreferenceSerde.scala new file mode 100644 index 00000000000..745aae9aa1a --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/LocationPreferenceSerde.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.{JsonGenerator, JsonParser} +import com.fasterxml.jackson.databind.{ + DeserializationContext, + JsonDeserializer, + JsonNode, + JsonSerializer, + SerializerProvider +} +import org.apache.texera.amber.core.workflow.{ + LocationPreference, + PreferController, + RoundRobinPreference +} + +/** + * Discriminator values used in the JSON representation of [[LocationPreference]]. + * Kept in one place so the serializer and deserializer cannot drift apart. + */ +private object LocationPreferenceSerde { + val PreferControllerType = "preferController" + val RoundRobinType = "roundRobin" +} + +/** + * Serializes a [[LocationPreference]] singleton as `{"type": ""}`. + */ +class LocationPreferenceSerializer extends JsonSerializer[LocationPreference] { + override def serialize( + value: LocationPreference, + gen: JsonGenerator, + serializers: SerializerProvider + ): Unit = { + val typeName = value match { + case PreferController => LocationPreferenceSerde.PreferControllerType + case RoundRobinPreference => LocationPreferenceSerde.RoundRobinType + } + gen.writeStartObject() + gen.writeStringField("type", typeName) + gen.writeEndObject() + } +} + +/** + * Deserializes a [[LocationPreference]] from `{"type": ""}`, always + * returning the canonical singleton instance so that `eq` identity and pattern + * matching keep working after a round-trip. + */ +class LocationPreferenceDeserializer extends JsonDeserializer[LocationPreference] { + override def deserialize( + p: JsonParser, + ctxt: DeserializationContext + ): LocationPreference = { + val node: JsonNode = p.getCodec.readTree(p) + val typeNode = node.get("type") + if (typeNode == null) { + throw new IllegalArgumentException( + s"Cannot deserialize LocationPreference: missing 'type' field in $node" + ) + } + typeNode.asText() match { + case LocationPreferenceSerde.PreferControllerType => PreferController + case LocationPreferenceSerde.RoundRobinType => RoundRobinPreference + case other => + throw new IllegalArgumentException(s"Unknown LocationPreference type: $other") + } + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OpExecInitInfoSerde.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OpExecInitInfoSerde.scala new file mode 100644 index 00000000000..33d5a1c1df7 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OpExecInitInfoSerde.scala @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.{JsonGenerator, JsonParser} +import com.fasterxml.jackson.databind.{ + DeserializationContext, + JsonDeserializer, + JsonNode, + JsonSerializer, + SerializerProvider +} +import org.apache.texera.amber.core.executor.{ + OpExecInitInfo, + OpExecSource, + OpExecWithClassName, + OpExecWithCode +} +import org.apache.texera.amber.core.virtualidentity.WorkflowIdentity + +/** + * JSON field names used by [[OpExecInitInfoSerializer]] / [[OpExecInitInfoDeserializer]]. + * Centralized so the two halves cannot drift apart. + */ +private object OpExecInitInfoSerde { + val KindField = "kind" + val ClassNameKind = "className" + val CodeKind = "code" + val SourceKind = "source" + val EmptyKind = "empty" + + val ClassNameField = "className" + val DescStringField = "descString" + val CodeField = "code" + val LanguageField = "language" + val StorageKeyField = "storageKey" + val WorkflowIdentityField = "workflowIdentity" +} + +/** + * Serializes the scalapb sealed-oneof [[OpExecInitInfo]] to a tagged JSON object. + * + * `OpExecInitInfo` is a scalapb `GeneratedSealedOneof`; Jackson cannot introspect its + * three concrete subtypes (`OpExecWithClassName`, `OpExecWithCode`, `OpExecSource`) + * via `@JsonTypeInfo`. This serializer maps each subtype to an explicit + * `{"kind": ...}` discriminator plus its scalar fields. `OpExecSource.workflowIdentity` + * is a nested scalapb message and is delegated to the surrounding codec. + */ +class OpExecInitInfoSerializer extends JsonSerializer[OpExecInitInfo] { + override def serialize( + value: OpExecInitInfo, + gen: JsonGenerator, + serializers: SerializerProvider + ): Unit = { + gen.writeStartObject() + value match { + case OpExecWithClassName(className, descString) => + gen.writeStringField(OpExecInitInfoSerde.KindField, OpExecInitInfoSerde.ClassNameKind) + gen.writeStringField(OpExecInitInfoSerde.ClassNameField, className) + gen.writeStringField(OpExecInitInfoSerde.DescStringField, descString) + case OpExecWithCode(code, language) => + gen.writeStringField(OpExecInitInfoSerde.KindField, OpExecInitInfoSerde.CodeKind) + gen.writeStringField(OpExecInitInfoSerde.CodeField, code) + gen.writeStringField(OpExecInitInfoSerde.LanguageField, language) + case OpExecSource(storageKey, workflowIdentity) => + gen.writeStringField(OpExecInitInfoSerde.KindField, OpExecInitInfoSerde.SourceKind) + gen.writeStringField(OpExecInitInfoSerde.StorageKeyField, storageKey) + gen.writeObjectField(OpExecInitInfoSerde.WorkflowIdentityField, workflowIdentity) + case OpExecInitInfo.Empty => + gen.writeStringField(OpExecInitInfoSerde.KindField, OpExecInitInfoSerde.EmptyKind) + } + gen.writeEndObject() + } +} + +/** + * Reconstructs an [[OpExecInitInfo]] from the tagged JSON object produced by + * [[OpExecInitInfoSerializer]]. The resulting value is the same concrete subtype the + * engine pattern-matches on in `SerializationManager` / `ExecFactory`, so the + * round-tripped operator can still build a runnable executor. + */ +class OpExecInitInfoDeserializer extends JsonDeserializer[OpExecInitInfo] { + override def deserialize( + p: JsonParser, + ctxt: DeserializationContext + ): OpExecInitInfo = { + val codec = p.getCodec + val node: JsonNode = codec.readTree(p) + val kindNode = node.get(OpExecInitInfoSerde.KindField) + if (kindNode == null) { + throw new IllegalArgumentException( + s"Cannot deserialize OpExecInitInfo: missing '${OpExecInitInfoSerde.KindField}' field in $node" + ) + } + + def text(field: String): String = { + val n = node.get(field) + if (n == null || n.isNull) "" else n.asText() + } + + kindNode.asText() match { + case OpExecInitInfoSerde.ClassNameKind => + OpExecWithClassName( + text(OpExecInitInfoSerde.ClassNameField), + text(OpExecInitInfoSerde.DescStringField) + ) + case OpExecInitInfoSerde.CodeKind => + OpExecWithCode( + text(OpExecInitInfoSerde.CodeField), + text(OpExecInitInfoSerde.LanguageField) + ) + case OpExecInitInfoSerde.SourceKind => + val wfNode = node.get(OpExecInitInfoSerde.WorkflowIdentityField) + val workflowIdentity = + if (wfNode == null || wfNode.isNull) WorkflowIdentity.defaultInstance + else codec.treeToValue(wfNode, classOf[WorkflowIdentity]) + OpExecSource(text(OpExecInitInfoSerde.StorageKeyField), workflowIdentity) + case OpExecInitInfoSerde.EmptyKind => + OpExecInitInfo.Empty + case other => + throw new IllegalArgumentException(s"Unknown OpExecInitInfo kind: $other") + } + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OutputModeSerde.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OutputModeSerde.scala new file mode 100644 index 00000000000..52289899160 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/OutputModeSerde.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.{JsonGenerator, JsonParser} +import com.fasterxml.jackson.databind.{ + DeserializationContext, + JsonDeserializer, + JsonNode, + JsonSerializer, + SerializerProvider +} +import org.apache.texera.amber.core.workflow.OutputPort.OutputMode + +/** + * Serializes the scalapb enum `OutputPort.OutputMode` as its integer wire value. + * + * `OutputMode` is a scalapb `GeneratedEnum` (a sealed abstract class whose values are + * `case object`s), so Jackson's default bean serialization emits an object that cannot be + * reconstructed (the abstract class has no usable constructor). Emitting the canonical + * protobuf integer value keeps the representation compact and unambiguous, and matches + * how scalapb itself identifies enum values. + */ +class OutputModeSerializer extends JsonSerializer[OutputMode] { + override def serialize( + value: OutputMode, + gen: JsonGenerator, + serializers: SerializerProvider + ): Unit = { + gen.writeNumber(value.value) + } +} + +/** + * Reconstructs an `OutputPort.OutputMode` from its integer wire value via + * `OutputMode.fromValue`. For robustness it also accepts the legacy object form + * `{"value": , ...}` produced by Jackson's default bean serializer. + */ +class OutputModeDeserializer extends JsonDeserializer[OutputMode] { + override def deserialize( + p: JsonParser, + ctxt: DeserializationContext + ): OutputMode = { + val node: JsonNode = p.getCodec.readTree(p) + val intValue = + if (node.isNumber) node.asInt() + else if (node.isObject && node.has("value")) node.get("value").asInt() + else + throw new IllegalArgumentException( + s"Cannot deserialize OutputPort.OutputMode from $node" + ) + OutputMode.fromValue(intValue) + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpDeserializer.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpDeserializer.scala new file mode 100644 index 00000000000..8883a378eca --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpDeserializer.scala @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.JsonParser +import com.fasterxml.jackson.databind.{ + DeserializationContext, + JsonDeserializer, + JsonNode, + ObjectMapper +} +import org.apache.texera.amber.core.executor.OpExecInitInfo +import org.apache.texera.amber.core.tuple.Schema +import org.apache.texera.amber.core.virtualidentity.{ + ExecutionIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.apache.texera.amber.core.workflow.{ + DerivePartitionSpec, + InputPort, + LocationPreference, + OutputPort, + PartitionInfo, + Passthrough, + PhysicalOp, + PortIdentity +} + +/** + * Custom Jackson deserializer for [[PhysicalOp]]. + * + * jackson-module-scala binds Scala case classes to their primary constructor, whose + * `inputPorts` / `outputPorts` maps (`Either[Throwable, Schema]` + lazy vals) and + * `propagateSchema` / `derivePartition` function values cannot be parsed from JSON. + * Serialization instead emits the slimmed-down `inputPortsSerialized` / + * `outputPortsSerialized` views and the `partitionDeriveSpec`; this deserializer reads + * those back and delegates to [[PhysicalOp.fromSerialized]] to rebuild the real maps + * (with empty link lists, to be rehydrated at the `PhysicalPlan` level) and the lazy + * `derivePartition` function. + * + * Each nested field is decoded through the surrounding mapper, so all other registered + * (de)serializers (e.g. for `OpExecInitInfo`, `LocationPreference`, `PartitionInfo`, + * `DerivePartitionSpec`, and the `PortIdentity` map keys) are reused. + */ +class PhysicalOpDeserializer extends JsonDeserializer[PhysicalOp] { + + override def deserialize(p: JsonParser, ctxt: DeserializationContext): PhysicalOp = { + val mapper = p.getCodec.asInstanceOf[ObjectMapper] + val node: JsonNode = mapper.readTree(p) + + def required[T](field: String, clazz: Class[T]): T = { + val child = node.get(field) + if (child == null || child.isNull) { + throw new IllegalArgumentException( + s"Cannot deserialize PhysicalOp: missing required field '$field' in $node" + ) + } + mapper.treeToValue(child, clazz) + } + + def optionalNode(field: String): Option[JsonNode] = { + val child = node.get(field) + if (child == null || child.isNull) None else Some(child) + } + + val id = required("id", classOf[PhysicalOpIdentity]) + val workflowId = required("workflowId", classOf[WorkflowIdentity]) + val executionId = required("executionId", classOf[ExecutionIdentity]) + val opExecInitInfo = required("opExecInitInfo", classOf[OpExecInitInfo]) + + val parallelizable = + optionalNode("parallelizable").forall(_.asBoolean(true)) + val isOneToManyOp = + optionalNode("isOneToManyOp").exists(_.asBoolean(false)) + val pveName = + optionalNode("pveName").map(_.asText("")).getOrElse("") + + val locationPreference: Option[LocationPreference] = + optionalNode("locationPreference").map(n => mapper.treeToValue(n, classOf[LocationPreference])) + + val suggestedWorkerNum: Option[Int] = + optionalNode("suggestedWorkerNum").map(_.asInt()) + + val partitionDeriveSpec: DerivePartitionSpec = + optionalNode("partitionDeriveSpec") + .map(n => mapper.treeToValue(n, classOf[DerivePartitionSpec])) + .getOrElse(Passthrough()) + + // List[Option[PartitionInfo]] — decode element-wise to preserve nulls as None. + val partitionRequirement: List[Option[PartitionInfo]] = + optionalNode("partitionRequirement") match { + case Some(arr) if arr.isArray => + val builder = List.newBuilder[Option[PartitionInfo]] + arr.forEach { elem => + if (elem == null || elem.isNull) builder += None + else builder += Some(mapper.treeToValue(elem, classOf[PartitionInfo])) + } + builder.result() + case _ => List.empty + } + + val inputPortsSerialized = + optionalNode("inputPortsSerialized") + .map(decodeInputPorts(mapper, _)) + .getOrElse(Map.empty[PortIdentity, (InputPort, Option[Schema])]) + val outputPortsSerialized = + optionalNode("outputPortsSerialized") + .map(decodeOutputPorts(mapper, _)) + .getOrElse(Map.empty[PortIdentity, (OutputPort, Option[Schema])]) + + PhysicalOp.fromSerialized( + id = id, + workflowId = workflowId, + executionId = executionId, + opExecInitInfo = opExecInitInfo, + parallelizable = parallelizable, + locationPreference = locationPreference, + partitionRequirement = partitionRequirement, + partitionDeriveSpec = partitionDeriveSpec, + inputPortsSerialized = inputPortsSerialized, + outputPortsSerialized = outputPortsSerialized, + isOneToManyOp = isOneToManyOp, + suggestedWorkerNum = suggestedWorkerNum, + pveName = pveName + ) + } + + /** + * Decodes the `{ "": [port, schemaOrNull], ... }` object emitted for the + * serialized input-port view. The keys are decoded via the registered + * `PortIdentityKeyDeserializer` (mirroring `PortIdentityKeySerializer`). + */ + private def decodeInputPorts( + mapper: ObjectMapper, + node: JsonNode + ): Map[PortIdentity, (InputPort, Option[Schema])] = + decodePortMap(mapper, node, classOf[InputPort]) + + private def decodeOutputPorts( + mapper: ObjectMapper, + node: JsonNode + ): Map[PortIdentity, (OutputPort, Option[Schema])] = + decodePortMap(mapper, node, classOf[OutputPort]) + + private def decodePortMap[P]( + mapper: ObjectMapper, + node: JsonNode, + portClass: Class[P] + ): Map[PortIdentity, (P, Option[Schema])] = { + if (!node.isObject) { + return Map.empty[PortIdentity, (P, Option[Schema])] + } + val builder = Map.newBuilder[PortIdentity, (P, Option[Schema])] + val fields = node.fields() + while (fields.hasNext) { + val entry = fields.next() + val portId = parsePortKey(entry.getKey) + val tupleNode = entry.getValue + // The Scala module serializes a Tuple2 as a 2-element JSON array: [port, schema?]. + val portNode = tupleNode.get(0) + val schemaNode = tupleNode.get(1) + val port = mapper.treeToValue(portNode, portClass) + val schemaOpt: Option[Schema] = + if (schemaNode == null || schemaNode.isNull) None + else Some(mapper.treeToValue(schemaNode, classOf[Schema])) + builder += portId -> ((port, schemaOpt)) + } + builder.result() + } + + /** + * Parses the `"_"` port key produced by `PortIdentityKeySerializer`. + */ + private def parsePortKey(key: String): PortIdentity = { + val parts = key.split("_") + PortIdentity(parts(0).toInt, parts(1).toBoolean) + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpSerializer.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpSerializer.scala new file mode 100644 index 00000000000..e7495fbdb7a --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalOpSerializer.scala @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind.{JsonSerializer, SerializerProvider} +import org.apache.texera.amber.core.workflow.{PartitionInfo, PhysicalOp, PortIdentity} + +/** + * Custom Jackson serializer for [[PhysicalOp]], paired with [[PhysicalOpDeserializer]]. + * + * It is needed for two reasons: + * + * 1. The runtime `inputPorts` / `outputPorts` maps hold per-port link lists plus an + * `Either[Throwable, Schema]`, which are not serializable. They are emitted here as + * the slimmed-down `inputPortsSerialized` / `outputPortsSerialized` views (links + * dropped, `Either` collapsed to `Option[Schema]`); the link lists are rebuilt at the + * `PhysicalPlan` level by replaying `links`. + * + * 2. `partitionRequirement` is a `List[Option[PartitionInfo]]`. `PartitionInfo` is + * polymorphic (`@JsonTypeInfo`), but wrapping it in `Option` / `List` erases its + * static type and Jackson then drops the `type` discriminator. Here each element is + * written through the polymorphic base-type serializer so the discriminator survives + * and the value can be read back. + * + * All other fields are written by delegating to the surrounding provider, so the + * registered (de)serializers for `OpExecInitInfo`, `LocationPreference`, + * `DerivePartitionSpec`, `OutputPort.OutputMode`, and the `PortIdentity` map keys are all + * reused. Functions (`derivePartition`, `propagateSchema`) are intentionally not written. + */ +class PhysicalOpSerializer extends JsonSerializer[PhysicalOp] { + + override def serialize( + op: PhysicalOp, + gen: JsonGenerator, + provider: SerializerProvider + ): Unit = { + gen.writeStartObject() + + gen.writeObjectField("id", op.id) + gen.writeObjectField("workflowId", op.workflowId) + gen.writeObjectField("executionId", op.executionId) + gen.writeObjectField("opExecInitInfo", op.opExecInitInfo) + gen.writeBooleanField("parallelizable", op.parallelizable) + + op.locationPreference.foreach { pref => + gen.writeObjectField("locationPreference", pref) + } + + // partitionRequirement: write each element so the @JsonTypeInfo discriminator of the + // polymorphic PartitionInfo is emitted even though the Option/List wrapper has erased + // the static element type. We combine the concrete value serializer (which writes the + // subtype's fields) with the base-type TypeSerializer (which writes the `type` id). + gen.writeArrayFieldStart("partitionRequirement") + val partitionInfoType = provider.constructType(classOf[PartitionInfo]) + val partitionTypeSerializer = provider.findTypeSerializer(partitionInfoType) + op.partitionRequirement.foreach { + case Some(partitionInfo) => + val concreteSerializer = provider.findValueSerializer(partitionInfo.getClass) + concreteSerializer.serializeWithType(partitionInfo, gen, provider, partitionTypeSerializer) + case None => gen.writeNull() + } + gen.writeEndArray() + + gen.writeObjectField("partitionDeriveSpec", op.partitionDeriveSpec) + + writePortMap(gen, provider, "inputPortsSerialized", op.inputPortsSerialized) + writePortMap(gen, provider, "outputPortsSerialized", op.outputPortsSerialized) + + gen.writeBooleanField("isOneToManyOp", op.isOneToManyOp) + op.suggestedWorkerNum.foreach(n => gen.writeNumberField("suggestedWorkerNum", n)) + gen.writeStringField("pveName", op.pveName) + + gen.writeEndObject() + } + + /** + * Writes a `Map[PortIdentity, (Port, Option[Schema])]` as a JSON object keyed by the + * `PortIdentityKeySerializer` string key, with each value a `[port, schemaOrNull]` + * array. This mirrors what `PhysicalOpDeserializer` reads back. + */ + private def writePortMap[P]( + gen: JsonGenerator, + provider: SerializerProvider, + fieldName: String, + portMap: Map[PortIdentity, (P, Option[org.apache.texera.amber.core.tuple.Schema])] + ): Unit = { + gen.writeObjectFieldStart(fieldName) + portMap.foreach { + case (portId, (port, schemaOpt)) => + gen.writeFieldName(PortIdentityKeySerializer.portIdToString(portId)) + gen.writeStartArray() + gen.writeObject(port) + schemaOpt match { + case Some(schema) => gen.writeObject(schema) + case None => gen.writeNull() + } + gen.writeEndArray() + } + gen.writeEndObject() + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalPlanDeserializer.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalPlanDeserializer.scala new file mode 100644 index 00000000000..c6282f417d9 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/serde/PhysicalPlanDeserializer.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.util.serde + +import com.fasterxml.jackson.core.JsonParser +import com.fasterxml.jackson.databind.`type`.TypeFactory +import com.fasterxml.jackson.databind.{ + DeserializationContext, + JsonDeserializer, + JsonNode, + ObjectMapper +} +import org.apache.texera.amber.core.workflow.{PhysicalLink, PhysicalOp, PhysicalPlan} + +import scala.jdk.CollectionConverters._ + +/** + * Custom Jackson deserializer for [[PhysicalPlan]]. + * + * The plan's operators are deserialized by [[PhysicalOpDeserializer]] with EMPTY per-port + * link lists (links are dropped from the per-port serialized views). This deserializer + * rebuilds the full plan and then rehydrates each operator's input/output link lists by + * replaying the plan-level `links`, so the round-tripped plan is structurally identical to + * the original: every op carries the correct incoming/outgoing links on the correct ports. + * + * `addInputLink` / `addOutputLink` are used (not `addLink`) so that schema propagation is + * NOT re-run: per-port schemas were already restored during operator deserialization. + */ +class PhysicalPlanDeserializer extends JsonDeserializer[PhysicalPlan] { + + override def deserialize(p: JsonParser, ctxt: DeserializationContext): PhysicalPlan = { + val mapper = p.getCodec.asInstanceOf[ObjectMapper] + val node: JsonNode = mapper.readTree(p) + val tf: TypeFactory = mapper.getTypeFactory + + val operatorsNode = node.get("operators") + val linksNode = node.get("links") + + val operators: Set[PhysicalOp] = + if (operatorsNode == null || operatorsNode.isNull) Set.empty + else { + val setType = tf.constructCollectionType(classOf[java.util.LinkedHashSet[_]], classOf[PhysicalOp]) + val javaSet: java.util.Set[PhysicalOp] = mapper.convertValue(operatorsNode, setType) + javaSet.asScala.toSet + } + + val links: Set[PhysicalLink] = + if (linksNode == null || linksNode.isNull) Set.empty + else { + val setType = + tf.constructCollectionType(classOf[java.util.LinkedHashSet[_]], classOf[PhysicalLink]) + val javaSet: java.util.Set[PhysicalLink] = mapper.convertValue(linksNode, setType) + javaSet.asScala.toSet + } + + rebuildLinks(PhysicalPlan(operators, links)) + } + + /** + * Replays `plan.links` onto each operator's per-port link lists. Operators arrive with + * empty link lists; we append each link to its source op's output port and its + * destination op's input port. + */ + private def rebuildLinks(plan: PhysicalPlan): PhysicalPlan = { + val opMap = scala.collection.mutable.Map[ + org.apache.texera.amber.core.virtualidentity.PhysicalOpIdentity, + PhysicalOp + ]() + plan.operators.foreach(op => opMap(op.id) = op) + + plan.links.foreach { link => + opMap.get(link.fromOpId).foreach(op => opMap(link.fromOpId) = op.addOutputLink(link)) + opMap.get(link.toOpId).foreach(op => opMap(link.toOpId) = op.addInputLink(link)) + } + + plan.copy(operators = opMap.values.toSet, links = plan.links) + } +} diff --git a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/workflow/PhysicalPlanSerializationSpec.scala b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/workflow/PhysicalPlanSerializationSpec.scala new file mode 100644 index 00000000000..0a1061a8443 --- /dev/null +++ b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/workflow/PhysicalPlanSerializationSpec.scala @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.workflow + +import org.apache.texera.amber.core.executor.{ + OpExecInitInfo, + OpExecSource, + OpExecWithClassName, + OpExecWithCode +} +import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema} +import org.apache.texera.amber.core.virtualidentity.{ + ExecutionIdentity, + OperatorIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +/** + * Proves that a compiled [[PhysicalPlan]] can be serialized to JSON and deserialized + * back into an equivalent, runnable plan. The plan is assembled the way operator + * descriptors assemble it: via the [[PhysicalOp]] factory methods + `withInputPorts` / + * `withOutputPorts` / `withDerivePartition` / `PhysicalPlan.addLink`, exercising every + * field that used to be `@JsonIgnore`d because it held a function closure. + */ +class PhysicalPlanSerializationSpec extends AnyFlatSpec with Matchers { + + private val workflowId = WorkflowIdentity(42L) + private val executionId = ExecutionIdentity(7L) + + private def opId(name: String): PhysicalOpIdentity = + PhysicalOpIdentity(OperatorIdentity(name), "main") + + private val scanSchema: Schema = + Schema(List(new Attribute("id", AttributeType.INTEGER), new Attribute("name", AttributeType.STRING))) + private val projectedSchema: Schema = + Schema(List(new Attribute("name", AttributeType.STRING))) + private val aggSchema: Schema = + Schema(List(new Attribute("name", AttributeType.STRING), new Attribute("cnt", AttributeType.LONG))) + + /** + * Builds a realistic 4-operator plan: scan (source, className, PreferController) -> + * projection (one-to-one, ProjectionPartition) -> aggregate (hash, ToHash) -> + * sink (many-to-one, code, ToSingle), with output schemas attached on every port so + * the per-port schema round-trip is exercised. + */ + private def buildPlan(): PhysicalPlan = { + val scanOp = PhysicalOp + .sourcePhysicalOp( + opId("scan"), + workflowId, + executionId, + OpExecWithClassName("org.apache.texera.amber.operator.source.scan.csv.CSVScanSourceOpExec") + ) + .withOutputPorts(List(OutputPort(PortIdentity(0)))) + // sources have no input to propagate from, so set the output schema directly + val scanWithSchema = setOutputSchema(scanOp, PortIdentity(0), scanSchema) + + val projectionOp = PhysicalOp + .oneToOnePhysicalOp( + opId("projection"), + workflowId, + executionId, + OpExecWithClassName( + "org.apache.texera.amber.operator.projection.ProjectionOpExec", + """{"attributes":[],"isDrop":false}""" + ) + ) + .withInputPorts(List(InputPort(PortIdentity(0)))) + .withOutputPorts(List(OutputPort(PortIdentity(0)))) + .withDerivePartition(ProjectionPartition()) + .withPropagateSchema(SchemaPropagationFunc(_ => Map(PortIdentity(0) -> projectedSchema))) + + val aggregateOp = PhysicalOp + .oneToOnePhysicalOp( + opId("aggregate"), + workflowId, + executionId, + OpExecWithClassName("org.apache.texera.amber.operator.aggregate.AggregateOpExec", "{}") + ) + .withInputPorts(List(InputPort(PortIdentity(0)))) + .withOutputPorts(List(OutputPort(PortIdentity(0)))) + .withPartitionRequirement(List(Option(HashPartition(List("name"))))) + .withDerivePartition(ToHash(List("name"))) + .withParallelizable(false) + .withPropagateSchema(SchemaPropagationFunc(_ => Map(PortIdentity(0) -> aggSchema))) + + val sinkOp = PhysicalOp + .manyToOnePhysicalOp( + opId("sink"), + workflowId, + executionId, + OpExecWithCode("def process(): pass", "python") + ) + .withInputPorts(List(InputPort(PortIdentity(0)))) + .withOutputPorts(List(OutputPort(PortIdentity(0)))) + .withPropagateSchema(SchemaPropagationFunc(_ => Map(PortIdentity(0) -> aggSchema))) + + var plan = PhysicalPlan( + operators = Set(scanWithSchema, projectionOp, aggregateOp, sinkOp), + links = Set.empty + ) + plan = plan.addLink(PhysicalLink(opId("scan"), PortIdentity(0), opId("projection"), PortIdentity(0))) + plan = plan.addLink( + PhysicalLink(opId("projection"), PortIdentity(0), opId("aggregate"), PortIdentity(0)) + ) + plan = plan.addLink(PhysicalLink(opId("aggregate"), PortIdentity(0), opId("sink"), PortIdentity(0))) + plan + } + + /** Helper: force a Right schema onto a specific output port. */ + private def setOutputSchema(op: PhysicalOp, portId: PortIdentity, schema: Schema): PhysicalOp = { + val (port, links, _) = op.outputPorts(portId) + op.copy(outputPorts = op.outputPorts + (portId -> ((port, links, Right(schema))))) + } + + private def roundTrip(plan: PhysicalPlan): PhysicalPlan = { + val json = objectMapper.writeValueAsString(plan) + objectMapper.readValue(json, classOf[PhysicalPlan]) + } + + // --------------------------------------------------------------------------- + // Structural equivalence + // --------------------------------------------------------------------------- + + "PhysicalPlan JSON round-trip" should "preserve the operator id set" in { + val plan = buildPlan() + val back = roundTrip(plan) + back.operators.map(_.id) shouldBe plan.operators.map(_.id) + } + + it should "preserve the link set" in { + val plan = buildPlan() + val back = roundTrip(plan) + back.links shouldBe plan.links + } + + it should "rehydrate per-port input/output link lists from the plan's links" in { + val plan = buildPlan() + val back = roundTrip(plan) + // every op's input/output link lists must match the original op's lists + plan.operators.foreach { orig => + val restored = back.getOperator(orig.id) + restored.getInputLinks() should contain theSameElementsAs orig.getInputLinks() + orig.outputPorts.keys.foreach { portId => + restored.getOutputLinks(portId) should contain theSameElementsAs orig.getOutputLinks(portId) + } + } + // sanity: the projection op really does have 1 incoming + 1 outgoing link + val proj = back.getOperator(opId("projection")) + proj.getInputLinks() should have size 1 + proj.getOutputLinks(PortIdentity(0)) should have size 1 + } + + // --------------------------------------------------------------------------- + // Runtime-critical, per-operator equivalence + // --------------------------------------------------------------------------- + + it should "preserve opExecInitInfo (className/descString) on the scan operator" in { + val plan = buildPlan() + val back = roundTrip(plan) + back.getOperator(opId("scan")).opExecInitInfo shouldBe a[OpExecWithClassName] + back.getOperator(opId("scan")).opExecInitInfo shouldBe + plan.getOperator(opId("scan")).opExecInitInfo + } + + it should "preserve opExecInitInfo (code/language) on the sink operator" in { + val plan = buildPlan() + val back = roundTrip(plan) + val info = back.getOperator(opId("sink")).opExecInitInfo + info shouldBe a[OpExecWithCode] + info shouldBe plan.getOperator(opId("sink")).opExecInitInfo + info.asInstanceOf[OpExecWithCode].language shouldBe "python" + } + + it should "round-trip an OpExecSource (with workflowIdentity) standalone" in { + val source: OpExecInitInfo = OpExecSource("storage-key-1", workflowId) + val json = objectMapper.writeValueAsString(source) + val back = objectMapper.readValue(json, classOf[OpExecInitInfo]) + back shouldBe source + back.asInstanceOf[OpExecSource].workflowIdentity shouldBe workflowId + } + + it should "preserve partitionRequirement on every operator" in { + val plan = buildPlan() + val back = roundTrip(plan) + plan.operators.foreach { orig => + back.getOperator(orig.id).partitionRequirement shouldBe orig.partitionRequirement + } + // the aggregate's hash requirement survives with its attribute names + back.getOperator(opId("aggregate")).partitionRequirement shouldBe + List(Option(HashPartition(List("name")))) + } + + it should "preserve partitionDeriveSpec and reproduce the same derivePartition output" in { + val plan = buildPlan() + val back = roundTrip(plan) + + val sampleInputs = List(HashPartition(List("name")), UnknownPartition()) + + plan.operators.foreach { orig => + val restored = back.getOperator(orig.id) + restored.partitionDeriveSpec shouldBe orig.partitionDeriveSpec + restored.derivePartition(sampleInputs) shouldBe orig.derivePartition(sampleInputs) + } + + // spot-check the concrete spec types and outputs + back.getOperator(opId("scan")).partitionDeriveSpec shouldBe Passthrough() + back.getOperator(opId("projection")).partitionDeriveSpec shouldBe ProjectionPartition() + back.getOperator(opId("aggregate")).partitionDeriveSpec shouldBe ToHash(List("name")) + back.getOperator(opId("sink")).partitionDeriveSpec shouldBe ToSingle() + + back.getOperator(opId("aggregate")).derivePartition(sampleInputs) shouldBe + HashPartition(List("name")) + back.getOperator(opId("sink")).derivePartition(sampleInputs) shouldBe SinglePartition() + // projection passes a hash partition (with attrs) through unchanged + back.getOperator(opId("projection")).derivePartition(sampleInputs) shouldBe + HashPartition(List("name")) + // projection collapses an attribute-less hash partition to unknown + back + .getOperator(opId("projection")) + .derivePartition(List(HashPartition(List.empty))) shouldBe UnknownPartition() + } + + it should "preserve locationPreference (PreferController) and its singleton identity" in { + val plan = buildPlan() + val back = roundTrip(plan) + val pref = back.getOperator(opId("scan")).locationPreference + pref shouldBe Some(PreferController) + // singleton identity is preserved by the custom deserializer + assert(pref.get eq PreferController) + } + + it should "preserve absent locationPreference on non-source operators" in { + val plan = buildPlan() + val back = roundTrip(plan) + back.getOperator(opId("aggregate")).locationPreference shouldBe None + } + + it should "preserve per-port output schemas on every operator" in { + val plan = buildPlan() + val back = roundTrip(plan) + plan.operators.foreach { orig => + orig.outputPorts.foreach { + case (portId, (_, _, schemaEither)) => + val restoredSchema = back.getOperator(orig.id).outputPorts(portId)._3 + restoredSchema.toOption shouldBe schemaEither.toOption + } + } + // concrete schema check + back.getOperator(opId("scan")).outputPorts(PortIdentity(0))._3.toOption shouldBe Some(scanSchema) + back.getOperator(opId("aggregate")).outputPorts(PortIdentity(0))._3.toOption shouldBe + Some(aggSchema) + } + + it should "preserve input-port schemas propagated across links" in { + val plan = buildPlan() + val back = roundTrip(plan) + // the projection received the scan's output schema on its input port + plan.getOperator(opId("projection")).inputPorts(PortIdentity(0))._3.toOption shouldBe + Some(scanSchema) + back.getOperator(opId("projection")).inputPorts(PortIdentity(0))._3.toOption shouldBe + Some(scanSchema) + } + + it should "preserve parallelizable, isOneToManyOp, suggestedWorkerNum, and pveName" in { + val plan = buildPlan().setOperator( + buildPlan() + .getOperator(opId("projection")) + .withSuggestedWorkerNum(5) + .withPveName("my-pve") + .withIsOneToManyOp(true) + ) + val back = roundTrip(plan) + val proj = back.getOperator(opId("projection")) + proj.suggestedWorkerNum shouldBe Some(5) + proj.pveName shouldBe "my-pve" + proj.isOneToManyOp shouldBe true + back.getOperator(opId("aggregate")).parallelizable shouldBe false + back.getOperator(opId("scan")).parallelizable shouldBe false + } + + it should "preserve port metadata (displayName, dependencies, blocking, mode)" in { + val inPort = InputPort(PortIdentity(1), displayName = "in1", dependencies = List(PortIdentity(0))) + val outPort = OutputPort(PortIdentity(0), displayName = "out0", blocking = true) + val op = PhysicalOp + .oneToOnePhysicalOp(opId("x"), workflowId, executionId, OpExecWithClassName("X")) + .withInputPorts(List(InputPort(PortIdentity(0)), inPort)) + .withOutputPorts(List(outPort)) + val plan = PhysicalPlan(Set(op), Set.empty) + val back = roundTrip(plan) + val restored = back.getOperator(opId("x")) + restored.inputPorts(PortIdentity(1))._1 shouldBe inPort + restored.outputPorts(PortIdentity(0))._1 shouldBe outPort + } + + // --------------------------------------------------------------------------- + // Negative test + // --------------------------------------------------------------------------- + + "PhysicalPlan deserialization" should "fail on malformed JSON" in { + assertThrows[Exception] { + objectMapper.readValue("{ this is not valid json ", classOf[PhysicalPlan]) + } + } + + it should "fail when a required structural field is missing/garbled" in { + // operators present but with a bogus shape for an operator entry + val badJson = """{"operators":[{"id":"not-an-object"}],"links":[]}""" + assertThrows[Exception] { + objectMapper.readValue(badJson, classOf[PhysicalPlan]) + } + } + + "OpExecInitInfo deserialization" should "fail on an unknown kind" in { + val badJson = """{"kind":"bogus"}""" + val ex = intercept[Exception] { + objectMapper.readValue(badJson, classOf[OpExecInitInfo]) + } + // unwrap to confirm it is our explicit rejection, not an unrelated failure + val msg = Option(ex.getCause).map(_.getMessage).getOrElse(ex.getMessage) + msg should include("Unknown OpExecInitInfo kind") + } +} diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregateOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregateOpDesc.scala index 7e76b3ce7e0..8705fc12b56 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregateOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregateOpDesc.scala @@ -109,7 +109,7 @@ class AggregateOpDesc extends LogicalOp { ) ) .withPartitionRequirement(List(Option(HashPartition(groupByKeys)))) - .withDerivePartition(_ => HashPartition(groupByKeys)) + .withDerivePartition(ToHash(groupByKeys)) var plan = PhysicalPlan( operators = Set(partialPhysicalOp, finalPhysicalOp), diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/difference/DifferenceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/difference/DifferenceOpDesc.scala index c12f057a25b..40003cb6550 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/difference/DifferenceOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/difference/DifferenceOpDesc.scala @@ -42,7 +42,7 @@ class DifferenceOpDesc extends LogicalOp { .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(List(Option(HashPartition()), Option(HashPartition()))) - .withDerivePartition(_ => HashPartition()) + .withDerivePartition(ToHash()) .withPropagateSchema(SchemaPropagationFunc(inputSchemas => { Preconditions.checkArgument(inputSchemas.values.toSet.size == 1) val outputSchema = inputSchemas.values.head diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/distinct/DistinctOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/distinct/DistinctOpDesc.scala index 9e75e648bb4..a8d7520157f 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/distinct/DistinctOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/distinct/DistinctOpDesc.scala @@ -21,7 +21,13 @@ package org.apache.texera.amber.operator.distinct import org.apache.texera.amber.core.executor.OpExecWithClassName import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import org.apache.texera.amber.core.workflow.{HashPartition, InputPort, OutputPort, PhysicalOp} +import org.apache.texera.amber.core.workflow.{ + HashPartition, + InputPort, + OutputPort, + PhysicalOp, + ToHash +} import org.apache.texera.amber.operator.LogicalOp import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo} @@ -41,7 +47,7 @@ class DistinctOpDesc extends LogicalOp { .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(List(Option(HashPartition()))) - .withDerivePartition(_ => HashPartition()) + .withDerivePartition(ToHash()) } diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/hashJoin/HashJoinOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/hashJoin/HashJoinOpDesc.scala index 708546a84ad..e4c4ec20eda 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/hashJoin/HashJoinOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/hashJoin/HashJoinOpDesc.scala @@ -143,7 +143,7 @@ class HashJoinOpDesc[K] extends LogicalOp { Option(HashPartition(List(probeAttributeName))) ) ) - .withDerivePartition(_ => HashPartition(List(probeAttributeName))) + .withDerivePartition(ToHash(List(probeAttributeName))) .withParallelizable(true) .withPropagateSchema( SchemaPropagationFunc(inputSchemas => { diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/intersect/IntersectOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/intersect/IntersectOpDesc.scala index 323d51b0ad5..551203ec68e 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/intersect/IntersectOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/intersect/IntersectOpDesc.scala @@ -41,7 +41,7 @@ class IntersectOpDesc extends LogicalOp { .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(List(Option(HashPartition()), Option(HashPartition()))) - .withDerivePartition(_ => HashPartition()) + .withDerivePartition(ToHash()) } override def operatorInfo: OperatorInfo = diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/projection/ProjectionOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/projection/ProjectionOpDesc.scala index 3af25b98326..4ca6f8c2940 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/projection/ProjectionOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/projection/ProjectionOpDesc.scala @@ -54,7 +54,7 @@ class ProjectionOpDesc extends MapOpDesc { ) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) - .withDerivePartition(derivePartition()) + .withDerivePartition(ProjectionPartition()) .withPropagateSchema(SchemaPropagationFunc(inputSchemas => { require(attributes.nonEmpty, "Attributes must not be empty") @@ -74,21 +74,6 @@ class ProjectionOpDesc extends MapOpDesc { })) } - def derivePartition()(partition: List[PartitionInfo]): PartitionInfo = { - val inputPartitionInfo = partition.head - - val outputPartitionInfo = inputPartitionInfo match { - case HashPartition(hashAttributeNames) => - if (hashAttributeNames.nonEmpty) HashPartition(hashAttributeNames) else UnknownPartition() - case RangePartition(rangeAttributeNames, min, max) => - if (rangeAttributeNames.nonEmpty) RangePartition(rangeAttributeNames, min, max) - else UnknownPartition() - case _ => inputPartitionInfo - } - - outputPartitionInfo - } - override def operatorInfo: OperatorInfo = { OperatorInfo( "Projection", diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/symmetricDifference/SymmetricDifferenceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/symmetricDifference/SymmetricDifferenceOpDesc.scala index d0d6deecbe6..d806b319037 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/symmetricDifference/SymmetricDifferenceOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/symmetricDifference/SymmetricDifferenceOpDesc.scala @@ -45,7 +45,7 @@ class SymmetricDifferenceOpDesc extends LogicalOp { .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(List(Option(HashPartition()), Option(HashPartition()))) - .withDerivePartition(_ => HashPartition(List())) + .withDerivePartition(ToHash(List())) .withPropagateSchema(SchemaPropagationFunc(inputSchemas => { Preconditions.checkArgument(inputSchemas.values.toSet.size == 1) val outputSchema = inputSchemas.values.head diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/java/JavaUDFOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/java/JavaUDFOpDesc.scala index 0ea42ef3fa4..57c0867043e 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/java/JavaUDFOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/java/JavaUDFOpDesc.scala @@ -111,7 +111,7 @@ class JavaUDFOpDesc extends LogicalOp { operatorIdentifier, OpExecWithCode(code, "java") ) - .withDerivePartition(_ => UnknownPartition()) + .withDerivePartition(ToUnknown()) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(partitionRequirement) @@ -127,7 +127,7 @@ class JavaUDFOpDesc extends LogicalOp { operatorIdentifier, OpExecWithCode(code, "java") ) - .withDerivePartition(_ => UnknownPartition()) + .withDerivePartition(ToUnknown()) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(partitionRequirement) diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/DualInputPortsPythonUDFOpDescV2.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/DualInputPortsPythonUDFOpDescV2.scala index 1cfa29c9f1e..772c2616bce 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/DualInputPortsPythonUDFOpDescV2.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/DualInputPortsPythonUDFOpDescV2.scala @@ -127,7 +127,7 @@ class DualInputPortsPythonUDFOpDescV2 extends LogicalOp { .withParallelizable(false) } physicalOp - .withDerivePartition(_ => UnknownPartition()) + .withDerivePartition(ToUnknown()) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPropagateSchema( diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/PythonUDFOpDescV2.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/PythonUDFOpDescV2.scala index 6739041a539..7ec57fcfadb 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/PythonUDFOpDescV2.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/python/PythonUDFOpDescV2.scala @@ -157,7 +157,7 @@ class PythonUDFOpDescV2 extends LogicalOp { } physicalOp - .withDerivePartition(_ => UnknownPartition()) + .withDerivePartition(ToUnknown()) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(partitionRequirement) diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/r/RUDFOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/r/RUDFOpDesc.scala index 577924d80f4..6d7d2d59035 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/r/RUDFOpDesc.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/udf/r/RUDFOpDesc.scala @@ -125,7 +125,7 @@ class RUDFOpDesc extends LogicalOp { OpExecWithCode(code, r_operator_type) ) .withParallelizable(false) - }.withDerivePartition(_ => UnknownPartition()) + }.withDerivePartition(ToUnknown()) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) .withPartitionRequirement(partitionRequirement) diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/service/WorkflowCompilingService.scala b/workflow-compiling-service/src/main/scala/org/apache/texera/service/WorkflowCompilingService.scala index 40fb3a2dd8f..bbe215a5dbd 100644 --- a/workflow-compiling-service/src/main/scala/org/apache/texera/service/WorkflowCompilingService.scala +++ b/workflow-compiling-service/src/main/scala/org/apache/texera/service/WorkflowCompilingService.scala @@ -19,12 +19,13 @@ package org.apache.texera.service +import com.fasterxml.jackson.databind.DeserializationFeature import com.fasterxml.jackson.module.scala.DefaultScalaModule import io.dropwizard.configuration.{EnvironmentVariableSubstitutor, SubstitutingSourceProvider} import io.dropwizard.core.Application import io.dropwizard.core.setup.{Bootstrap, Environment} import org.apache.texera.amber.config.StorageConfig -import org.apache.texera.amber.util.ObjectMapperUtils +import org.apache.texera.amber.util.{ObjectMapperUtils, PhysicalPlanSerdeModule} import org.apache.texera.dao.SqlServer import org.apache.texera.service.resource.{HealthCheckResource, WorkflowCompilationResource} import org.eclipse.jetty.servlet.FilterHolder @@ -42,6 +43,15 @@ class WorkflowCompilingService extends Application[WorkflowCompilingServiceConfi ) // register scala module to dropwizard default object mapper bootstrap.getObjectMapper.registerModule(DefaultScalaModule) + // A logical plan may reach /compile from clients that re-serialize a fully-typed LogicalOp + // (e.g. ComputingUnitMaster forwarding a plan via JSONUtils.objectMapper), which emits the + // operator's derived getters in addition to its config. Those extra properties are not + // constructor fields here, so tolerate unknown properties rather than rejecting the request. + bootstrap.getObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + // also register the PhysicalPlan serializers so the /compile response (which now contains a + // PhysicalPlan with custom-serialized PortIdentity keys, OpExecInitInfo, and OutputMode) can be + // serialized byte-for-byte compatibly with JSONUtils.objectMapper on the consumer side. + PhysicalPlanSerdeModule.register(bootstrap.getObjectMapper) } override def run( diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala index 501498b1d5c..da859151a4d 100644 --- a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala +++ b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala @@ -23,11 +23,11 @@ import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} import com.typesafe.scalalogging.LazyLogging import jakarta.annotation.security.RolesAllowed import jakarta.ws.rs.core.MediaType -import jakarta.ws.rs.{Consumes, POST, Path, Produces} +import jakarta.ws.rs.{Consumes, POST, Path, Produces, QueryParam} import org.apache.texera.amber.compiler.WorkflowCompiler import org.apache.texera.amber.compiler.model.LogicalPlanPojo import org.apache.texera.amber.core.tuple.Attribute -import org.apache.texera.amber.core.virtualidentity.WorkflowIdentity +import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity} import org.apache.texera.amber.core.workflow.{PhysicalPlan, WorkflowContext} import org.apache.texera.amber.core.workflowruntimestate.WorkflowFatalError import org.apache.texera.amber.util.serde.PortIdentityKeySerializer @@ -63,10 +63,21 @@ class WorkflowCompilationResource extends LazyLogging { @POST @Path("") def compileWorkflow( - logicalPlanPojo: LogicalPlanPojo + logicalPlanPojo: LogicalPlanPojo, + @QueryParam("workflowId") workflowId: java.lang.Long, + @QueryParam("executionId") executionId: java.lang.Long ): WorkflowCompilationResponse = { - // a placeholder workflow context, as compiling a workflow doesn't require a wid from the frontend - val context = new WorkflowContext(workflowId = WorkflowIdentity(0)) + // The frontend compiles for editing and passes no ids, so default to a placeholder context. + // A computing unit compiling before execution passes the real workflow/execution ids, so the + // resulting physical plan (e.g. OpExecSource.workflowIdentity, result-storage URIs) is built + // against the actual identities rather than the placeholder. + val context = new WorkflowContext( + workflowId = + if (workflowId != null) WorkflowIdentity(workflowId.longValue()) else WorkflowIdentity(0), + executionId = + if (executionId != null) ExecutionIdentity(executionId.longValue()) + else WorkflowContext.DEFAULT_EXECUTION_ID + ) // Compile the pojo using WorkflowCompiler val compilationResult = new WorkflowCompiler(context).compile(logicalPlanPojo) From c60ce7f3135e8e002f7b9254c12fb4cfefcc48c6 Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sun, 31 May 2026 00:42:59 -0700 Subject: [PATCH 3/4] refactor: share logical model + plan expansion between the two compilers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amber and the workflow-compiling-service each carried a near-duplicate WorkflowCompiler and logical-plan model (LogicalPlan, LogicalPlanPojo, LogicalLink) — the duplication was even flagged in-code ("we should consider merge this compile with WorkflowCompilingService's"). Move the canonical pieces into the shared workflow-operator module: - org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlan, LogicalPlanPojo} now live in workflow-operator as the single copy (the LogicalLink string serializer from #5042 is preserved). The duplicates in amber and workflow-compiling-service are deleted. - PhysicalPlanExpander.expand holds the logical-to-physical expansion the two compilers shared. - Each compiler is now a thin wrapper over it: amber's adds result-storage planning and builds a runtime Workflow (carrying the logical plan) for execution; the compiling-service's adds output-schema collection and error reporting for the editor. amber's references migrate to the shared model package. No behavior change: the logical-link round-trip, compiler-produced physical-plan serde, and storage-port collection specs all pass. --- .../architecture/controller/Workflow.scala | 2 +- .../EditingTimeCompilationRequest.scala | 2 +- .../request/WorkflowExecuteRequest.scala | 10 +- .../web/resource/SyncExecutionResource.scala | 5 +- .../web/service/CompilingServiceClient.scala | 2 +- .../texera/web/service/WorkflowService.scala | 2 +- .../apache/texera/workflow/LogicalPlan.scala | 124 --------------- .../texera/workflow/WorkflowCompiler.scala | 141 ++++-------------- .../e2e/ReconfigurationIntegrationSpec.scala | 2 +- .../controller/WorkflowSchedulerSpec.scala | 2 +- .../CostBasedScheduleGeneratorSpec.scala | 2 +- .../scheduling/DefaultCostEstimatorSpec.scala | 2 +- ...ExpansionGreedyScheduleGeneratorSpec.scala | 2 +- .../ResourcePoliciesSpec.scala | 2 +- .../engine/e2e/BatchSizePropagationSpec.scala | 2 +- .../amber/engine/e2e/DataProcessingSpec.scala | 2 +- .../texera/amber/engine/e2e/PauseSpec.scala | 2 +- .../engine/e2e/ReconfigurationSpec.scala | 2 +- .../texera/amber/engine/e2e/TestUtils.scala | 5 +- .../faulttolerance/CheckpointSpec.scala | 2 +- .../texera/workflow/LogicalLinkSpec.scala | 1 + .../PhysicalPlanCompiledRoundTripSpec.scala | 2 +- .../PhysicalPlanRoundTripThoroughSpec.scala | 2 +- .../workflow/WorkflowCompilerSpec.scala | 2 +- .../amber/compiler/PhysicalPlanExpander.scala | 111 ++++++++++++++ .../amber/compiler/model}/LogicalLink.scala | 2 +- .../amber/compiler/model/LogicalPlan.scala | 18 ++- .../compiler/model/LogicalPlanPojo.scala | 0 .../amber/compiler/WorkflowCompiler.scala | 78 +--------- .../amber/compiler/model/LogicalLink.scala | 41 ----- 30 files changed, 184 insertions(+), 388 deletions(-) delete mode 100644 amber/src/main/scala/org/apache/texera/workflow/LogicalPlan.scala create mode 100644 common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/PhysicalPlanExpander.scala rename {amber/src/main/scala/org/apache/texera/workflow => common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model}/LogicalLink.scala (98%) rename {workflow-compiling-service => common/workflow-operator}/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala (90%) rename {workflow-compiling-service => common/workflow-operator}/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlanPojo.scala (100%) delete mode 100644 workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala diff --git a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala index 6644a6c5a6d..4c787dcd8bf 100644 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala @@ -20,7 +20,7 @@ package org.apache.texera.amber.engine.architecture.controller import org.apache.texera.amber.core.workflow.{PhysicalPlan, WorkflowContext} -import org.apache.texera.workflow.LogicalPlan +import org.apache.texera.amber.compiler.model.LogicalPlan case class Workflow( context: WorkflowContext, diff --git a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/EditingTimeCompilationRequest.scala b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/EditingTimeCompilationRequest.scala index e15b441fcae..4809b7a0196 100644 --- a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/EditingTimeCompilationRequest.scala +++ b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/EditingTimeCompilationRequest.scala @@ -19,8 +19,8 @@ package org.apache.texera.web.model.websocket.request +import org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlanPojo} import org.apache.texera.amber.operator.LogicalOp -import org.apache.texera.workflow.LogicalLink case class EditingTimeCompilationRequest( operators: List[LogicalOp], diff --git a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala index a346a1ec0a4..084537b1551 100644 --- a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala +++ b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala @@ -20,9 +20,8 @@ package org.apache.texera.web.model.websocket.request import com.fasterxml.jackson.databind.annotation.JsonDeserialize +import org.apache.texera.amber.compiler.model.LogicalPlanPojo import org.apache.texera.amber.core.workflow.WorkflowSettings -import org.apache.texera.amber.operator.LogicalOp -import org.apache.texera.workflow.LogicalLink case class ReplayExecutionInfo( @JsonDeserialize(contentAs = classOf[java.lang.Long]) @@ -39,10 +38,3 @@ case class WorkflowExecuteRequest( emailNotificationEnabled: Boolean, computingUnitId: Int ) extends TexeraWebSocketRequest - -case class LogicalPlanPojo( - operators: List[LogicalOp], - links: List[LogicalLink], - opsToViewResult: List[String], - opsToReuseResult: List[String] -) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala index 359b2929c27..6a5554d4817 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala @@ -48,8 +48,9 @@ import io.reactivex.rxjava3.core.Observable import org.apache.texera.auth.SessionUser import org.apache.texera.dao.SqlServer import org.apache.texera.dao.jooq.generated.Tables.OPERATOR_EXECUTIONS -import org.apache.texera.web.model.websocket.request.{LogicalPlanPojo, WorkflowExecuteRequest} -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalPlanPojo +import org.apache.texera.web.model.websocket.request.WorkflowExecuteRequest +import org.apache.texera.amber.compiler.model.LogicalLink import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource import org.apache.texera.web.service.{ExecutionResultService, WorkflowService} import org.apache.texera.web.storage.ExecutionStateStore.updateWorkflowState diff --git a/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala b/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala index ba7022b90f2..bb21c543625 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala @@ -23,7 +23,7 @@ import com.fasterxml.jackson.databind.JsonNode import org.apache.texera.amber.config.EnvironmentalVariable import org.apache.texera.amber.core.workflow.PhysicalPlan import org.apache.texera.amber.util.JSONUtils.objectMapper -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.apache.texera.amber.compiler.model.LogicalPlanPojo import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets diff --git a/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala b/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala index 90d4a20edf8..7d76647ccee 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/WorkflowService.scala @@ -57,7 +57,7 @@ import org.apache.texera.web.service.WorkflowService.mkWorkflowStateId import org.apache.texera.web.storage.ExecutionStateStore.updateWorkflowState import org.apache.texera.web.storage.{ExecutionStateStore, WorkflowStateStore} import org.apache.texera.web.{SubscriptionManager, WorkflowLifecycleManager} -import org.apache.texera.workflow.LogicalPlan +import org.apache.texera.amber.compiler.model.LogicalPlan import play.api.libs.json.Json import java.net.URI diff --git a/amber/src/main/scala/org/apache/texera/workflow/LogicalPlan.scala b/amber/src/main/scala/org/apache/texera/workflow/LogicalPlan.scala deleted file mode 100644 index 974d17f40a4..00000000000 --- a/amber/src/main/scala/org/apache/texera/workflow/LogicalPlan.scala +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.texera.workflow - -import com.typesafe.scalalogging.LazyLogging -import org.apache.texera.amber.core.storage.FileResolver -import org.apache.texera.amber.core.virtualidentity.OperatorIdentity -import org.apache.texera.amber.operator.LogicalOp -import org.apache.texera.amber.operator.source.scan.ScanSourceOpDesc -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo -import org.jgrapht.graph.DirectedAcyclicGraph -import org.jgrapht.util.SupplierUtil - -import java.util -import scala.collection.mutable.ArrayBuffer -import scala.util.{Failure, Success, Try} - -object LogicalPlan { - - private def toJgraphtDAG( - operatorList: List[LogicalOp], - links: List[LogicalLink] - ): DirectedAcyclicGraph[OperatorIdentity, LogicalLink] = { - val workflowDag = - new DirectedAcyclicGraph[OperatorIdentity, LogicalLink]( - null, // vertexSupplier - SupplierUtil.createSupplier(classOf[LogicalLink]), // edgeSupplier - false, // weighted - true // allowMultipleEdges - ) - operatorList.foreach(op => workflowDag.addVertex(op.operatorIdentifier)) - links.foreach(l => - workflowDag.addEdge( - l.fromOpId, - l.toOpId, - l - ) - ) - workflowDag - } - - def apply( - pojo: LogicalPlanPojo - ): LogicalPlan = { - LogicalPlan(pojo.operators, pojo.links) - } -} - -case class LogicalPlan( - operators: List[LogicalOp], - links: List[LogicalLink] -) extends LazyLogging { - - private lazy val operatorMap: Map[OperatorIdentity, LogicalOp] = - operators.map(op => (op.operatorIdentifier, op)).toMap - - private lazy val jgraphtDag: DirectedAcyclicGraph[OperatorIdentity, LogicalLink] = - LogicalPlan.toJgraphtDAG(operators, links) - - def getTopologicalOpIds: util.Iterator[OperatorIdentity] = jgraphtDag.iterator() - - def getOperator(opId: OperatorIdentity): LogicalOp = operatorMap(opId) - - def getTerminalOperatorIds: List[OperatorIdentity] = - operatorMap.keys - .filter(op => jgraphtDag.outDegreeOf(op) == 0) - .toList - - def getUpstreamLinks(opId: OperatorIdentity): List[LogicalLink] = { - links.filter(l => l.toOpId == opId) - } - - /** - * Resolve all user-given filename for the scan source operators to URIs, and call op.setFileUri to set the URi - * - * @param errorList if given, put errors during resolving to it - */ - def resolveScanSourceOpFileName( - errorList: Option[ArrayBuffer[(OperatorIdentity, Throwable)]] - ): Unit = { - operators.foreach { - case operator @ (scanOp: ScanSourceOpDesc) => - Try { - // Resolve file path for ScanSourceOpDesc - val fileName = scanOp.fileName.getOrElse(throw new RuntimeException("no input file name")) - val fileUri = FileResolver.resolve(fileName) // Convert to URI - - // Set the URI in the ScanSourceOpDesc - scanOp.setResolvedFileName(fileUri) - } match { - case Success(_) => // Successfully resolved and set the file URI - - case Failure(err) => - logger.error("Error resolving file path for ScanSourceOpDesc", err) - errorList match { - case Some(errList) => - errList.append((operator.operatorIdentifier, err)) - case None => - // Throw the error if no errorList is provided - throw err - } - } - - case _ => // Skip non-ScanSourceOpDesc operators - } - } -} diff --git a/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala b/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala index d5bd64ad886..1017fcb4a33 100644 --- a/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala +++ b/amber/src/main/scala/org/apache/texera/workflow/WorkflowCompiler.scala @@ -20,136 +20,51 @@ package org.apache.texera.workflow import com.typesafe.scalalogging.LazyLogging +import org.apache.texera.amber.compiler.PhysicalPlanExpander +import org.apache.texera.amber.compiler.model.{LogicalPlan, LogicalPlanPojo} import org.apache.texera.amber.core.virtualidentity.OperatorIdentity -import org.apache.texera.amber.core.workflow._ +import org.apache.texera.amber.core.workflow.{GlobalPortIdentity, WorkflowContext} import org.apache.texera.amber.engine.architecture.controller.Workflow -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer -import scala.jdk.CollectionConverters.IteratorHasAsScala -import scala.util.{Failure, Success, Try} +/** + * The in-JVM compiler used right before execution. It shares the logical-to-physical expansion with + * the workflow-compiling-service's compiler (both call [[PhysicalPlanExpander]]); on top of that it + * computes the output ports that need result storage and wraps the plan in a runtime [[Workflow]] + * that still carries the logical plan. The compiling-service's compiler instead reports schemas and + * errors for the editor. + */ class WorkflowCompiler( context: WorkflowContext ) extends LazyLogging { /** - * Function to expand logical plan to physical plan - * @return the expanded physical plan and a set of output ports that need storage - */ - private def expandLogicalPlan( - logicalPlan: LogicalPlan, - logicalOpsToViewResult: List[String], - errorList: Option[ArrayBuffer[(OperatorIdentity, Throwable)]] - ): (PhysicalPlan, Set[GlobalPortIdentity]) = { - val terminalLogicalOps = logicalPlan.getTerminalOperatorIds - val logicalOpsNeedingStorage = - (terminalLogicalOps ++ logicalOpsToViewResult.map(OperatorIdentity(_))).toSet - var physicalPlan = PhysicalPlan(operators = Set.empty, links = Set.empty) - val outputPortsNeedingStorage: mutable.HashSet[GlobalPortIdentity] = mutable.HashSet() - - logicalPlan.getTopologicalOpIds.asScala.foreach(logicalOpId => - Try { - val logicalOp = logicalPlan.getOperator(logicalOpId) - - val subPlan = logicalOp.getPhysicalPlan(context.workflowId, context.executionId) - subPlan - .topologicalIterator() - .map(subPlan.getOperator) - .foreach({ physicalOp => - { - val externalLinks = logicalPlan - .getUpstreamLinks(logicalOp.operatorIdentifier) - .filter(link => physicalOp.inputPorts.contains(link.toPortId)) - .flatMap { link => - physicalPlan - .getPhysicalOpsOfLogicalOp(link.fromOpId) - .find(_.outputPorts.contains(link.fromPortId)) - .map(fromOp => - PhysicalLink(fromOp.id, link.fromPortId, physicalOp.id, link.toPortId) - ) - } - - val internalLinks = subPlan.getUpstreamPhysicalLinks(physicalOp.id) - - // Add the operator to the physical plan - physicalPlan = physicalPlan.addOperator(physicalOp.propagateSchema()) - - // Add all the links to the physical plan - physicalPlan = (externalLinks ++ internalLinks) - .foldLeft(physicalPlan) { (plan, link) => plan.addLink(link) } - - // **Check for Python-based operator errors during code generation** - if (physicalOp.isPythonBased) { - val code = physicalOp.getCode - val exceptionPattern = """#EXCEPTION DURING CODE GENERATION:\s*(.*)""".r - - exceptionPattern.findFirstMatchIn(code).foreach { matchResult => - val errorMessage = matchResult.group(1).trim - val error = - new RuntimeException(s"Operator is not configured properly: $errorMessage") - - errorList match { - case Some(list) => list.append((logicalOpId, error)) // Store error and continue - case None => throw error // Throw immediately if no error list is provided - } - } - } - } - }) - - // convert logical operators needing storage to output ports needing storage - subPlan - .topologicalIterator() - .filter(opId => logicalOpsNeedingStorage.contains(opId.logicalOpId)) - .map(physicalPlan.getOperator) - .foreach { physicalOp => - physicalOp.outputPorts - .filterNot(_._1.internal) - .foreach { - case (outputPortId, _) => - outputPortsNeedingStorage += GlobalPortIdentity( - opId = physicalOp.id, - portId = outputPortId - ) - } - } - } match { - case Success(_) => - - case Failure(err) => - errorList match { - case Some(list) => list.append((logicalOpId, err)) - case None => throw err - } - } - ) - (physicalPlan, outputPortsNeedingStorage.toSet) - } - - /** - * Compile a workflow to physical plan, along with the schema propagation result and error(if any) - * - * Comparing to WorkflowCompilingService's compiler, which is used solely for workflow editing, - * This compile is used before executing the workflow. + * Compile a workflow to a runnable [[Workflow]] (physical plan + logical plan + context). * - * TODO: we should consider merge this compile with WorkflowCompilingService's compile - * @param logicalPlanPojo the pojo parsed from workflow str provided by user - * @return Workflow, containing the physical plan, logical plan and workflow context + * @param logicalPlanPojo the pojo parsed from the workflow string provided by the user */ def compile( logicalPlanPojo: LogicalPlanPojo ): Workflow = { - // 1. convert the pojo to logical plan + // 1. convert the pojo to a logical plan val logicalPlan: LogicalPlan = LogicalPlan(logicalPlanPojo) - // 2. resolve the file name in each scan source operator + // 2. resolve the file name in each scan source operator (throws on failure: no error list) logicalPlan.resolveScanSourceOpFileName(None) - // 3. expand the logical plan to the physical plan, and get a set of output ports that need storage - val (physicalPlan, outputPortsNeedingStorage) = - expandLogicalPlan(logicalPlan, logicalPlanPojo.opsToViewResult, None) + // 3. expand the logical plan into a physical plan (shared with the compiling-service compiler) + val physicalPlan = PhysicalPlanExpander.expand(context, logicalPlan, None) + + // 4. mark the output ports of terminal / to-view operators as needing result storage + val logicalOpsNeedingStorage = + (logicalPlan.getTerminalOperatorIds ++ logicalPlanPojo.opsToViewResult.map(OperatorIdentity(_))).toSet + val outputPortsNeedingStorage: Set[GlobalPortIdentity] = physicalPlan.operators + .filter(physicalOp => logicalOpsNeedingStorage.contains(physicalOp.id.logicalOpId)) + .flatMap { physicalOp => + physicalOp.outputPorts.keys + .filterNot(_.internal) + .map(portId => GlobalPortIdentity(opId = physicalOp.id, portId = portId)) + } + .toSet context.workflowSettings = context.workflowSettings.copy( outputPortsNeedingStorage = outputPortsNeedingStorage diff --git a/amber/src/test/integration/org/apache/texera/amber/engine/e2e/ReconfigurationIntegrationSpec.scala b/amber/src/test/integration/org/apache/texera/amber/engine/e2e/ReconfigurationIntegrationSpec.scala index 6f0936da287..147e19415a6 100644 --- a/amber/src/test/integration/org/apache/texera/amber/engine/e2e/ReconfigurationIntegrationSpec.scala +++ b/amber/src/test/integration/org/apache/texera/amber/engine/e2e/ReconfigurationIntegrationSpec.scala @@ -46,7 +46,7 @@ import org.apache.texera.amber.engine.e2e.TestUtils.{ import org.apache.texera.amber.operator.source.scan.text.TextInputSourceOpDesc import org.apache.texera.amber.operator.{LogicalOp, TestOperators} import org.apache.texera.amber.tags.IntegrationTest -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries} import org.scalatest.flatspec.AnyFlatSpecLike diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/controller/WorkflowSchedulerSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/controller/WorkflowSchedulerSpec.scala index ac7358b438f..23ceda511c4 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/controller/WorkflowSchedulerSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/controller/WorkflowSchedulerSpec.scala @@ -23,7 +23,7 @@ import org.apache.texera.amber.core.workflow.{PortIdentity, WorkflowContext} import org.apache.texera.amber.engine.common.virtualidentity.util.CONTROLLER import org.apache.texera.amber.engine.e2e.TestUtils.buildWorkflow import org.apache.texera.amber.operator.TestOperators -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpec class WorkflowSchedulerSpec extends AnyFlatSpec { diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGeneratorSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGeneratorSpec.scala index 7d5227c36bc..644031d5b3e 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGeneratorSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/CostBasedScheduleGeneratorSpec.scala @@ -28,7 +28,7 @@ import org.apache.texera.amber.core.workflow.{ import org.apache.texera.amber.engine.common.virtualidentity.util.CONTROLLER import org.apache.texera.amber.engine.e2e.TestUtils.buildWorkflow import org.apache.texera.amber.operator.TestOperators -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalamock.scalatest.MockFactory import org.scalatest.flatspec.AnyFlatSpec diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/DefaultCostEstimatorSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/DefaultCostEstimatorSpec.scala index d1b2595cbb5..8aa7aea4aad 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/DefaultCostEstimatorSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/DefaultCostEstimatorSpec.scala @@ -39,7 +39,7 @@ import org.apache.texera.dao.MockTexeraDB import org.apache.texera.dao.jooq.generated.enums.UserRoleEnum import org.apache.texera.dao.jooq.generated.tables.daos._ import org.apache.texera.dao.jooq.generated.tables.pojos._ -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/ExpansionGreedyScheduleGeneratorSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/ExpansionGreedyScheduleGeneratorSpec.scala index e720b9c6cb5..08a0d51f434 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/ExpansionGreedyScheduleGeneratorSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/ExpansionGreedyScheduleGeneratorSpec.scala @@ -28,7 +28,7 @@ import org.apache.texera.amber.operator.udf.python.{ DualInputPortsPythonUDFOpDescV2, PythonUDFOpDescV2 } -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalamock.scalatest.MockFactory import org.scalatest.flatspec.AnyFlatSpec diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/resourcePolicies/ResourcePoliciesSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/resourcePolicies/ResourcePoliciesSpec.scala index 63a08e899fb..302c5659bf6 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/resourcePolicies/ResourcePoliciesSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/scheduling/resourcePolicies/ResourcePoliciesSpec.scala @@ -31,7 +31,7 @@ import org.apache.texera.amber.engine.architecture.sendsemantics.partitionings.{ } import org.apache.texera.amber.engine.e2e.TestUtils.buildWorkflow import org.apache.texera.amber.operator.TestOperators -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpec class ResourcePoliciesSpec extends AnyFlatSpec { diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/BatchSizePropagationSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/BatchSizePropagationSpec.scala index e9b830bdfdc..464fe859f28 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/BatchSizePropagationSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/BatchSizePropagationSpec.scala @@ -30,7 +30,7 @@ import org.apache.texera.amber.engine.common.virtualidentity.util.CONTROLLER import org.apache.texera.amber.engine.e2e.TestUtils.buildWorkflow import org.apache.texera.amber.operator.TestOperators import org.apache.texera.amber.operator.aggregate.AggregationFunction -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala index cd9ed4248ea..f91846c5a02 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/DataProcessingSpec.scala @@ -48,7 +48,7 @@ import org.apache.texera.amber.engine.e2e.TestUtils.{ import org.apache.texera.amber.operator.TestOperators import org.apache.texera.amber.operator.aggregate.AggregationFunction import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource.getResultUriByLogicalPortId -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries} diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/PauseSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/PauseSpec.scala index 2cc268608f1..2f24a5457b3 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/PauseSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/PauseSpec.scala @@ -44,7 +44,7 @@ import org.apache.texera.amber.engine.e2e.TestUtils.{ stateReached } import org.apache.texera.amber.operator.{LogicalOp, TestOperators} -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries} diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/ReconfigurationSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/ReconfigurationSpec.scala index 2cd3559736e..7ef63c2ae38 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/ReconfigurationSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/ReconfigurationSpec.scala @@ -35,7 +35,7 @@ import org.apache.texera.amber.engine.e2e.TestUtils.{ setUpWorkflowExecutionData } import org.apache.texera.amber.operator.{LogicalOp, TestOperators} -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Outcome, Retries} import org.scalatest.flatspec.AnyFlatSpecLike diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala index 6be8f15a4be..ee8397232dc 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/e2e/TestUtils.scala @@ -59,9 +59,10 @@ import org.apache.texera.dao.jooq.generated.tables.pojos.{ WorkflowVersion, Workflow => WorkflowPojo } -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.apache.texera.amber.compiler.model.LogicalPlanPojo import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource.getResultUriByLogicalPortId -import org.apache.texera.workflow.{LogicalLink, WorkflowCompiler} +import org.apache.texera.amber.compiler.model.LogicalLink +import org.apache.texera.workflow.WorkflowCompiler object TestUtils { diff --git a/amber/src/test/scala/org/apache/texera/amber/engine/faulttolerance/CheckpointSpec.scala b/amber/src/test/scala/org/apache/texera/amber/engine/faulttolerance/CheckpointSpec.scala index 3d207fd23b3..79dd730f0d7 100644 --- a/amber/src/test/scala/org/apache/texera/amber/engine/faulttolerance/CheckpointSpec.scala +++ b/amber/src/test/scala/org/apache/texera/amber/engine/faulttolerance/CheckpointSpec.scala @@ -33,7 +33,7 @@ import org.apache.texera.amber.engine.common.virtualidentity.util.{CONTROLLER, S import org.apache.texera.amber.engine.common.{AmberRuntime, CheckpointState} import org.apache.texera.amber.engine.e2e.TestUtils.buildWorkflow import org.apache.texera.amber.operator.TestOperators -import org.apache.texera.workflow.LogicalLink +import org.apache.texera.amber.compiler.model.LogicalLink import org.scalatest.BeforeAndAfterAll import org.scalatest.flatspec.AnyFlatSpecLike diff --git a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala index fa82c8ef752..791c762b711 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/LogicalLinkSpec.scala @@ -21,6 +21,7 @@ package org.apache.texera.workflow import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.exc.ValueInstantiationException +import org.apache.texera.amber.compiler.model.LogicalLink import org.apache.texera.amber.core.virtualidentity.OperatorIdentity import org.apache.texera.amber.core.workflow.PortIdentity import org.apache.texera.amber.util.JSONUtils.objectMapper diff --git a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala index f36936bc445..d254843cc2b 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanCompiledRoundTripSpec.scala @@ -24,7 +24,7 @@ import org.apache.texera.amber.core.workflow.{PartitionInfo, PhysicalPlan, PortI import org.apache.texera.amber.operator.TestOperators import org.apache.texera.amber.operator.aggregate.AggregationFunction import org.apache.texera.amber.util.JSONUtils.objectMapper -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlanPojo} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers diff --git a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala index 6ae35f071ea..8e795ae7f26 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/PhysicalPlanRoundTripThoroughSpec.scala @@ -33,7 +33,7 @@ import org.apache.texera.amber.core.workflow.{ import org.apache.texera.amber.operator.TestOperators import org.apache.texera.amber.operator.aggregate.AggregationFunction import org.apache.texera.amber.util.JSONUtils.objectMapper -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlanPojo} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers diff --git a/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala b/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala index c96d5b14a89..1a2b8930634 100644 --- a/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala +++ b/amber/src/test/scala/org/apache/texera/workflow/WorkflowCompilerSpec.scala @@ -22,7 +22,7 @@ package org.apache.texera.workflow import org.apache.texera.amber.core.workflow.{PortIdentity, WorkflowContext} import org.apache.texera.amber.operator.TestOperators import org.apache.texera.amber.operator.source.scan.csv.CSVScanSourceOpDesc -import org.apache.texera.web.model.websocket.request.LogicalPlanPojo +import org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlanPojo} import org.scalatest.flatspec.AnyFlatSpec /** diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/PhysicalPlanExpander.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/PhysicalPlanExpander.scala new file mode 100644 index 00000000000..02c7a8e768e --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/PhysicalPlanExpander.scala @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.compiler + +import org.apache.texera.amber.compiler.model.LogicalPlan +import org.apache.texera.amber.core.virtualidentity.OperatorIdentity +import org.apache.texera.amber.core.workflow.{PhysicalLink, PhysicalPlan, WorkflowContext} + +import scala.collection.mutable.ArrayBuffer +import scala.jdk.CollectionConverters.IteratorHasAsScala + +/** + * The shared logical-to-physical expansion, used by both the in-JVM compiler (amber's + * [[org.apache.texera.workflow.WorkflowCompiler]], which wraps it with storage planning and a + * runtime Workflow) and the workflow-compiling-service's compiler (which wraps it with output + * schema collection and error reporting). Keeping the expansion here avoids the two compilers + * drifting apart again. + */ +object PhysicalPlanExpander { + + /** + * Expand a [[LogicalPlan]] into a [[PhysicalPlan]]: for each logical operator (in topological + * order) materialize its physical sub-plan, wire the external (cross-operator) and internal + * links, and propagate schemas. Python operators whose code generation failed surface as errors. + * + * @param errorList if given, per-operator errors are appended and expansion continues; otherwise + * the first error is thrown. + */ + def expand( + context: WorkflowContext, + logicalPlan: LogicalPlan, + errorList: Option[ArrayBuffer[(OperatorIdentity, Throwable)]] + ): PhysicalPlan = { + var physicalPlan = PhysicalPlan(operators = Set.empty, links = Set.empty) + + logicalPlan.getTopologicalOpIds.asScala.foreach { logicalOpId => + val logicalOp = logicalPlan.getOperator(logicalOpId) + val allUpstreamLinks = logicalPlan.getUpstreamLinks(logicalOp.operatorIdentifier) + try { + val subPlan = logicalOp.getPhysicalPlan(context.workflowId, context.executionId) + subPlan + .topologicalIterator() + .map(subPlan.getOperator) + .foreach { physicalOp => + val externalLinks = allUpstreamLinks + .filter(link => physicalOp.inputPorts.contains(link.toPortId)) + .flatMap { link => + physicalPlan + .getPhysicalOpsOfLogicalOp(link.fromOpId) + .find(_.outputPorts.contains(link.fromPortId)) + .map(fromOp => + PhysicalLink(fromOp.id, link.fromPortId, physicalOp.id, link.toPortId) + ) + } + + val internalLinks = subPlan.getUpstreamPhysicalLinks(physicalOp.id) + + // Add the operator to the physical plan + physicalPlan = physicalPlan.addOperator(physicalOp.propagateSchema()) + + // Add all the links to the physical plan + physicalPlan = (externalLinks ++ internalLinks).foldLeft(physicalPlan) { (plan, link) => + plan.addLink(link) + } + + // Check for Python-based operator errors during code generation + if (physicalOp.isPythonBased) { + val code = physicalOp.getCode + val exceptionPattern = """#EXCEPTION DURING CODE GENERATION:\s*(.*)""".r + + exceptionPattern.findFirstMatchIn(code).foreach { matchResult => + val errorMessage = matchResult.group(1).trim + val error = + new RuntimeException(s"Operator is not configured properly: $errorMessage") + + errorList match { + case Some(list) => list.append((logicalOpId, error)) // Store error and continue + case None => throw error // Throw immediately if no error list is provided + } + } + } + } + } catch { + case err: Throwable => + errorList match { + case Some(list) => list.append((logicalOpId, err)) + case None => throw err + } + } + } + + physicalPlan + } +} diff --git a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala similarity index 98% rename from amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala rename to common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala index bf284d1d56e..e116e8251a3 100644 --- a/amber/src/main/scala/org/apache/texera/workflow/LogicalLink.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.texera.workflow +package org.apache.texera.amber.compiler.model import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty} import com.fasterxml.jackson.core.JsonGenerator diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala similarity index 90% rename from workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala rename to common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala index eecb435cc87..7cdb31ae7f4 100644 --- a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlan.scala @@ -81,6 +81,11 @@ case class LogicalPlan( def getOperator(opId: OperatorIdentity): LogicalOp = operatorMap(opId) + def getTerminalOperatorIds: List[OperatorIdentity] = + operatorMap.keys + .filter(op => jgraphtDag.outDegreeOf(op) == 0) + .toList + def addOperator(op: LogicalOp): LogicalPlan = { // TODO: fix schema for the new operator this.copy(operators :+ op, links) @@ -108,7 +113,8 @@ case class LogicalPlan( /** * Resolve all user-given filename for the scan source operators to URIs, and call op.setFileUri to set the URi - * @param errorList if given, put errors during resolving to it + * + * @param errorList if given, put errors during resolving to it; otherwise the error is thrown */ def resolveScanSourceOpFileName( errorList: Option[ArrayBuffer[(OperatorIdentity, Throwable)]] @@ -124,10 +130,18 @@ case class LogicalPlan( scanOp.setResolvedFileName(fileUri) } match { case Success(_) => // Successfully resolved and set the file URI + case Failure(err) => logger.error("Error resolving file path for ScanSourceOpDesc", err) - errorList.foreach(_.append((operator.operatorIdentifier, err))) + errorList match { + case Some(errList) => + errList.append((operator.operatorIdentifier, err)) + case None => + // Throw the error if no errorList is provided + throw err + } } + case _ => // Skip non-ScanSourceOpDesc operators } } diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlanPojo.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlanPojo.scala similarity index 100% rename from workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlanPojo.scala rename to common/workflow-operator/src/main/scala/org/apache/texera/amber/compiler/model/LogicalPlanPojo.scala diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/WorkflowCompiler.scala b/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/WorkflowCompiler.scala index 25166e7ac52..c5e74fc7e19 100644 --- a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/WorkflowCompiler.scala +++ b/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/WorkflowCompiler.scala @@ -28,19 +28,13 @@ import org.apache.texera.amber.compiler.WorkflowCompiler.{ import org.apache.texera.amber.compiler.model.{LogicalPlan, LogicalPlanPojo} import org.apache.texera.amber.core.tuple.Schema import org.apache.texera.amber.core.virtualidentity.OperatorIdentity -import org.apache.texera.amber.core.workflow.{ - PhysicalLink, - PhysicalPlan, - PortIdentity, - WorkflowContext -} +import org.apache.texera.amber.core.workflow.{PhysicalPlan, PortIdentity, WorkflowContext} import org.apache.texera.amber.core.workflowruntimestate.FatalErrorType.COMPILATION_ERROR import org.apache.texera.amber.core.workflowruntimestate.WorkflowFatalError import java.time.Instant import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import scala.jdk.CollectionConverters.IteratorHasAsScala object WorkflowCompiler { // util function for extracting the error causes @@ -125,74 +119,6 @@ class WorkflowCompiler( context: WorkflowContext ) extends LazyLogging { - // function to expand logical plan to physical plan - private def expandLogicalPlan( - logicalPlan: LogicalPlan, - errorList: Option[ArrayBuffer[(OperatorIdentity, Throwable)]] - ): PhysicalPlan = { - var physicalPlan = PhysicalPlan(operators = Set.empty, links = Set.empty) - - logicalPlan.getTopologicalOpIds.asScala.foreach { logicalOpId => - val logicalOp = logicalPlan.getOperator(logicalOpId) - val allUpstreamLinks = logicalPlan.getUpstreamLinks(logicalOp.operatorIdentifier) - - try { - val subPlan = logicalOp.getPhysicalPlan(context.workflowId, context.executionId) - - subPlan - .topologicalIterator() - .map(subPlan.getOperator) - .foreach { physicalOp => - val externalLinks = allUpstreamLinks - .filter(link => physicalOp.inputPorts.contains(link.toPortId)) - .flatMap { link => - physicalPlan - .getPhysicalOpsOfLogicalOp(link.fromOpId) - .find(_.outputPorts.contains(link.fromPortId)) - .map(fromOp => - PhysicalLink(fromOp.id, link.fromPortId, physicalOp.id, link.toPortId) - ) - } - - val internalLinks = subPlan.getUpstreamPhysicalLinks(physicalOp.id) - - // Add the operator to the physical plan - physicalPlan = physicalPlan.addOperator(physicalOp.propagateSchema()) - - // Add all the links to the physical plan - physicalPlan = (externalLinks ++ internalLinks).foldLeft(physicalPlan) { (plan, link) => - plan.addLink(link) - } - - // **Check for Python-based operator errors during code generation** - if (physicalOp.isPythonBased) { - val code = physicalOp.getCode - val exceptionPattern = """#EXCEPTION DURING CODE GENERATION:\s*(.*)""".r - - exceptionPattern.findFirstMatchIn(code).foreach { matchResult => - val errorMessage = matchResult.group(1).trim - val error = - new RuntimeException(s"Operator is not configured properly: $errorMessage") - - errorList match { - case Some(list) => list.append((logicalOpId, error)) // Store error and continue - case None => throw error // Throw immediately if no error list is provided - } - } - } - } - } catch { - case e: Throwable => - errorList match { - case Some(list) => list.append((logicalOpId, e)) // Store error - case None => throw e // Throw if no list is provided - } - } - } - - physicalPlan - } - /** * Compile a workflow to physical plan, along with the schema propagation result and error(if any) * @@ -211,7 +137,7 @@ class WorkflowCompiler( logicalPlan.resolveScanSourceOpFileName(Some(errorList)) // 3. expand the logical plan to the physical plan - val physicalPlan = expandLogicalPlan(logicalPlan, Some(errorList)) + val physicalPlan = PhysicalPlanExpander.expand(context, logicalPlan, Some(errorList)) // 4. collect the output schema for each logical op // even if error is encountered when logical => physical, we still want to get the input schemas for rest no-error operators diff --git a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala b/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala deleted file mode 100644 index 5c7662f9668..00000000000 --- a/workflow-compiling-service/src/main/scala/org/apache/texera/amber/compiler/model/LogicalLink.scala +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.texera.amber.compiler.model - -import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty} -import org.apache.texera.amber.core.virtualidentity.OperatorIdentity -import org.apache.texera.amber.core.workflow.PortIdentity - -case class LogicalLink( - @JsonProperty("fromOpId") fromOpId: OperatorIdentity, - fromPortId: PortIdentity, - @JsonProperty("toOpId") toOpId: OperatorIdentity, - toPortId: PortIdentity -) { - @JsonCreator - def this( - @JsonProperty("fromOpId") fromOpId: String, - fromPortId: PortIdentity, - @JsonProperty("toOpId") toOpId: String, - toPortId: PortIdentity - ) = { - this(OperatorIdentity(fromOpId), fromPortId, OperatorIdentity(toOpId), toPortId) - } -} From c3e670b3a6e6243565742532ca4cb39c07fc4467 Mon Sep 17 00:00:00 2001 From: Bob Bai Date: Sun, 31 May 2026 15:51:25 -0700 Subject: [PATCH 4/4] feat: run client-supplied physical plan on the Computing Unit without JWT auth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The client (frontend and agent service) now compiles the workflow against the workflow-compiling-service and ships the ready-to-run PhysicalPlan to the Computing Unit, which runs it directly — no in-process or HTTP compilation, and no JWT authentication, so the CU no longer needs the JWT secret (issue #5011). Frontend: - ExecuteWorkflowService compiles via WorkflowCompilingService on Run and sends WorkflowExecuteRequest{physicalPlan, opsToViewResult}; on a compile failure it surfaces the error and does not start a run. - The workflow websocket URL no longer carries an access token. Computing Unit: - ComputingUnitMaster drops setupJwtAuth and RolesAllowedDynamicFeature, keeping only the SessionUser value-factory binder so @Auth parameters on co-registered dashboard resources stay injectable; it registers PhysicalPlanSerdeModule. - ServletAwareConfigurator's single-node handshake no longer parses a token. - SyncExecutionResource (/run) accepts a PhysicalPlan and drops @Auth/@RolesAllowed. - WorkflowExecutionService runs request.physicalPlan; InternalExecutionMetadataResource falls back to the metadata caller's uid when the CU sends none. Cleanup: - Remove the now-unused CompilingServiceClient and WORKFLOW_COMPILING_SERVICE_ENDPOINT env var left from the abandoned CU-side compilation offload. Test: - Add ClientPhysicalPlanRequestSpec covering the physical-plan request round-trip. --- .../architecture/controller/Workflow.scala | 4 +- .../texera/web/ComputingUnitMaster.scala | 17 +- .../texera/web/ServletAwareConfigurator.scala | 34 +--- .../request/WorkflowExecuteRequest.scala | 13 +- .../InternalExecutionMetadataResource.scala | 5 +- .../web/resource/SyncExecutionResource.scala | 90 +++------- .../web/service/CompilingServiceClient.scala | 154 ------------------ .../ExecutionReconfigurationService.scala | 4 +- .../service/WorkflowExecutionService.scala | 59 +++++-- .../ClientPhysicalPlanRequestSpec.scala | 109 +++++++++++++ .../amber/config/EnvironmentalVariable.scala | 3 - .../workflow-compiling.service.ts | 17 ++ .../execute-workflow.service.ts | 38 +++-- .../workflow-websocket.service.ts | 6 +- .../types/workflow-websocket.interface.ts | 6 +- 15 files changed, 255 insertions(+), 304 deletions(-) delete mode 100644 amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala create mode 100644 amber/src/test/scala/org/apache/texera/web/service/ClientPhysicalPlanRequestSpec.scala diff --git a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala index 4c787dcd8bf..2f76e845b64 100644 --- a/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala +++ b/amber/src/main/scala/org/apache/texera/amber/engine/architecture/controller/Workflow.scala @@ -25,8 +25,8 @@ import org.apache.texera.amber.compiler.model.LogicalPlan case class Workflow( context: WorkflowContext, // The logical plan is only retained for in-JVM compilation paths (amber's WorkflowCompiler, - // e2e TestUtils). The runtime offloads compilation to the workflow-compiling-service over HTTP - // and builds a Workflow with `logicalPlan = None`; nothing on the execution path reads it. + // e2e TestUtils). At runtime the client ships a pre-compiled physical plan, so the Computing + // Unit builds a Workflow with `logicalPlan = None`; nothing on the execution path reads it. logicalPlan: Option[LogicalPlan] = None, physicalPlan: PhysicalPlan ) diff --git a/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala b/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala index 941585f720c..2f05bbaca47 100644 --- a/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala +++ b/amber/src/main/scala/org/apache/texera/web/ComputingUnitMaster.scala @@ -40,12 +40,11 @@ import org.apache.texera.amber.engine.common.client.AmberClient import org.apache.texera.amber.engine.common.storage.SequentialRecordStorage import org.apache.texera.amber.engine.common.{AmberRuntime, Utils} import org.apache.texera.amber.util.JSONUtils.objectMapper -import org.apache.texera.amber.util.ObjectMapperUtils +import org.apache.texera.amber.util.{ObjectMapperUtils, PhysicalPlanSerdeModule} import org.apache.commons.jcs3.access.exception.InvalidArgumentException import org.apache.texera.auth.SessionUser import org.apache.texera.dao.SqlServer import org.apache.texera.dao.jooq.generated.tables.pojos.WorkflowExecutions -import org.apache.texera.web.auth.JwtAuth.setupJwtAuth import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource import org.apache.texera.web.resource.{ SyncExecutionResource, @@ -136,6 +135,9 @@ class ComputingUnitMaster extends io.dropwizard.Application[Configuration] with ) // register scala module to dropwizard default object mapper bootstrap.getObjectMapper.registerModule(DefaultScalaModule) + // The execution request carries a pre-compiled PhysicalPlan; register its serializers so the + // CU deserializes it byte-for-byte compatibly with the workflow-compiling-service's output. + PhysicalPlanSerdeModule.register(bootstrap.getObjectMapper) } override def run(configuration: Configuration, environment: Environment): Unit = { @@ -172,14 +174,15 @@ class ComputingUnitMaster extends io.dropwizard.Application[Configuration] with environment.jersey.register(classOf[PveResource]) - setupJwtAuth(environment) - + // The Computing Unit performs no JWT authentication and holds no JWT secret (issue #5011): no + // JwtAuthFilter and no RolesAllowedDynamicFeature are registered, so @RolesAllowed is not + // enforced and the execution endpoints are open — the client ships a pre-compiled physical + // plan. The value-factory binder below is kept ONLY so that @Auth-annotated parameters on + // co-registered dashboard resources stay injectable (resolving to no authenticated user); + // it does not validate tokens. Contrast TexeraWebApplication, which keeps full JWT auth. environment.jersey.register( new io.dropwizard.auth.AuthValueFactoryProvider.Binder[SessionUser](classOf[SessionUser]) ) - environment.jersey.register( - classOf[org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature] - ) environment .servlets() .addServletListeners( diff --git a/amber/src/main/scala/org/apache/texera/web/ServletAwareConfigurator.scala b/amber/src/main/scala/org/apache/texera/web/ServletAwareConfigurator.scala index cb3628df5b3..b18acdae5c3 100644 --- a/amber/src/main/scala/org/apache/texera/web/ServletAwareConfigurator.scala +++ b/amber/src/main/scala/org/apache/texera/web/ServletAwareConfigurator.scala @@ -20,14 +20,10 @@ package org.apache.texera.web import com.typesafe.scalalogging.LazyLogging -import org.apache.http.client.utils.URLEncodedUtils -import org.apache.texera.auth.JwtAuth.jwtConsumer import org.apache.texera.auth.util.HeaderField import org.apache.texera.dao.jooq.generated.enums.PrivilegeEnum import org.apache.texera.dao.jooq.generated.tables.pojos.User -import java.net.URI -import java.nio.charset.Charset import javax.websocket.HandshakeResponse import javax.websocket.server.{HandshakeRequest, ServerEndpointConfig} import scala.jdk.CollectionConverters.{ListHasAsScala, _} @@ -82,36 +78,14 @@ class ServletAwareConfigurator extends ServerEndpointConfig.Configurator with La ) logger.debug(s"User created from headers: ID=$userId, Name=$userName") } else { - // SINGLE NODE MODE: Construct the User object from JWT in query parameters. - val params = - URLEncodedUtils.parse(new URI("?" + request.getQueryString), Charset.defaultCharset()) + // SINGLE NODE MODE: the Computing Unit does not authenticate. It grants WRITE access and + // runs whatever physical plan the client sends — no JWT is parsed or validated, so the CU + // holds no JWT secret. The execution owner is resolved downstream from the CU's + // USER_JWT_TOKEN when metadata is persisted. config.getUserProperties.put( HeaderField.UserComputingUnitAccess, PrivilegeEnum.WRITE.name() ) - params.asScala - .map(pair => pair.getName -> pair.getValue) - .toMap - .get("access-token") - .map(token => { - val claims = jwtConsumer.process(token).getJwtClaims - config.getUserProperties.put( - classOf[User].getName, - new User( - claims.getClaimValue("userId").asInstanceOf[Long].toInt, - claims.getSubject, - String.valueOf(claims.getClaimValue("email").asInstanceOf[String]), - null, - null, - null, - null, - null, - null, - null, - null - ) - ) - }) } } catch { case e: Exception => diff --git a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala index 084537b1551..8a39d187592 100644 --- a/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala +++ b/amber/src/main/scala/org/apache/texera/web/model/websocket/request/WorkflowExecuteRequest.scala @@ -20,8 +20,7 @@ package org.apache.texera.web.model.websocket.request import com.fasterxml.jackson.databind.annotation.JsonDeserialize -import org.apache.texera.amber.compiler.model.LogicalPlanPojo -import org.apache.texera.amber.core.workflow.WorkflowSettings +import org.apache.texera.amber.core.workflow.{PhysicalPlan, WorkflowSettings} case class ReplayExecutionInfo( @JsonDeserialize(contentAs = classOf[java.lang.Long]) @@ -29,10 +28,18 @@ case class ReplayExecutionInfo( interaction: String ) +/** + * Execution request the client sends to the ComputingUnitMaster. The client (frontend / agent + * service) compiles the workflow against the workflow-compiling-service and ships the resulting + * ready-to-run [[PhysicalPlan]] here, so the CU neither compiles nor authenticates — it just runs + * the plan. `opsToViewResult` (logical operator ids) is used to mark which output ports need + * result storage. + */ case class WorkflowExecuteRequest( executionName: String, engineVersion: String, - logicalPlan: LogicalPlanPojo, + physicalPlan: PhysicalPlan, + opsToViewResult: List[String] = List.empty, replayFromExecution: Option[ReplayExecutionInfo], // contains execution Id, interaction Id. workflowSettings: WorkflowSettings, emailNotificationEnabled: Boolean, diff --git a/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala index 06f45a44ec2..8ca93a2c736 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/InternalExecutionMetadataResource.scala @@ -74,9 +74,12 @@ class InternalExecutionMetadataResource { request: CreateExecutionRequest, @Auth user: SessionUser ): CreateExecutionResponse = { + // The execution owner is the request's uid when the caller knows the real user; when the + // caller is a no-auth Computing Unit that sends no uid, fall back to the authenticated user of + // this metadata call (the holder of the CU's USER_JWT_TOKEN). workflow_executions.uid is NOT NULL. val eid = ExecutionsMetadataPersistService.insertNewExecution( WorkflowIdentity(request.workflowId), - request.uid, + request.uid.orElse(Option(user.getUid)), request.executionName, request.environmentVersion, request.computingUnitId diff --git a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala index 6a5554d4817..89ab7fae45b 100644 --- a/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala +++ b/amber/src/main/scala/org/apache/texera/web/resource/SyncExecutionResource.scala @@ -21,10 +21,8 @@ package org.apache.texera.web.resource import com.fasterxml.jackson.databind.node.ObjectNode import com.typesafe.scalalogging.LazyLogging -import io.dropwizard.auth.Auth import org.apache.texera.amber.config.ApplicationConfig import org.apache.texera.amber.core.storage.DocumentFactory -import org.apache.texera.amber.operator.LogicalOp import org.apache.texera.amber.core.storage.model.VirtualDocument import org.apache.texera.amber.core.tuple.Tuple import org.apache.texera.amber.core.virtualidentity.{ @@ -32,7 +30,7 @@ import org.apache.texera.amber.core.virtualidentity.{ OperatorIdentity, WorkflowIdentity } -import org.apache.texera.amber.core.workflow.{PortIdentity, WorkflowSettings} +import org.apache.texera.amber.core.workflow.{PhysicalPlan, PortIdentity, WorkflowSettings} import org.apache.texera.amber.engine.architecture.rpc.controlcommands.{ ConsoleMessage, ConsoleMessageType @@ -45,19 +43,15 @@ import org.apache.texera.amber.engine.common.executionruntimestate.{ ExecutionStatsStore } import io.reactivex.rxjava3.core.Observable -import org.apache.texera.auth.SessionUser import org.apache.texera.dao.SqlServer import org.apache.texera.dao.jooq.generated.Tables.OPERATOR_EXECUTIONS -import org.apache.texera.amber.compiler.model.LogicalPlanPojo import org.apache.texera.web.model.websocket.request.WorkflowExecuteRequest -import org.apache.texera.amber.compiler.model.LogicalLink import org.apache.texera.web.resource.dashboard.user.workflow.WorkflowExecutionsResource import org.apache.texera.web.service.{ExecutionResultService, WorkflowService} import org.apache.texera.web.storage.ExecutionStateStore.updateWorkflowState import java.net.URI import java.util.concurrent.TimeUnit -import javax.annotation.security.RolesAllowed import javax.ws.rs._ import javax.ws.rs.core.MediaType import scala.collection.mutable @@ -66,7 +60,7 @@ import com.fasterxml.jackson.databind.ObjectMapper case class SyncExecutionRequest( executionName: String, - logicalPlan: LogicalPlanPojo, + physicalPlan: PhysicalPlan, workflowSettings: Option[WorkflowSettings], targetOperatorIds: List[String], timeoutSeconds: Int, @@ -122,14 +116,13 @@ class SyncExecutionResource extends LazyLogging { private val MAX_OPERATOR_RESULT_CHARS = 100000 private val MAX_OPERATOR_RESULT_CELL_CHARS = 20000 + // No @RolesAllowed / @Auth: the client ships a pre-compiled physical plan and the CU just runs it. @POST @Path("/{wid}/{cuid}/run") - @RolesAllowed(Array("REGULAR", "ADMIN")) def executeWorkflowSync( @PathParam("wid") workflowId: Long, @PathParam("cuid") computingUnitId: Int, - request: SyncExecutionRequest, - @Auth user: SessionUser + request: SyncExecutionRequest ): SyncExecutionResult = { val timeoutSeconds = request.timeoutSeconds @@ -152,14 +145,12 @@ class SyncExecutionResource extends LazyLogging { shutdownPreviousExecution(workflowService) - // "Execute To" semantics: when a single target is given, run only its upstream sub-DAG. - val effectiveLogicalPlan = - computeSubDAGIfNeeded(request.logicalPlan, request.targetOperatorIds) - + // The client already compiled (and scoped any "execute to" sub-DAG) into this physical plan. val executeRequest = WorkflowExecuteRequest( executionName = request.executionName, engineVersion = "1.0", - logicalPlan = effectiveLogicalPlan, + physicalPlan = request.physicalPlan, + opsToViewResult = request.targetOperatorIds, replayFromExecution = None, workflowSettings = request.workflowSettings .getOrElse( @@ -169,9 +160,11 @@ class SyncExecutionResource extends LazyLogging { computingUnitId = computingUnitId ) + // No authenticated user on the CU; the execution owner is resolved by the dashboard service + // from the CU's USER_JWT_TOKEN when metadata is persisted. workflowService.initExecutionService( executeRequest, - Some(user.getUser), + None, new URI(s"sync-execution://$workflowId") ) @@ -198,10 +191,18 @@ class SyncExecutionResource extends LazyLogging { // Guard against firing during that window by also requiring every declared external // input port to be present in the operator's input metrics — port-1 stats only appear // once probe actually starts consuming, which closes the race. - val targetExpectedExternalInputs: Map[String, Int] = effectiveLogicalPlan.operators - .filter(op => request.targetOperatorIds.contains(op.operatorIdentifier.id)) - .map(op => op.operatorIdentifier.id -> op.operatorInfo.inputPorts.count(!_.id.internal)) - .toMap + // Per target operator, the count of its EXTERNAL (non-internal) input ports — derived from + // the physical ops of that logical operator. A multi-input op (e.g. HashJoin) only truly + // finishes once every external input has reported, which closes the premature-COMPLETED race. + val targetExpectedExternalInputs: Map[String, Int] = request.targetOperatorIds.map { opId => + val logicalOpId = OperatorIdentity(opId) + val externalInputPorts = request.physicalPlan.operators + .filter(_.id.logicalOpId == logicalOpId) + .flatMap(_.inputPorts.keys) + .filterNot(_.internal) + .toSet + opId -> externalInputPorts.size + }.toMap // Require COMPLETED, not just "has output", so upstream operators finish flushing // their data downstream before we tear the execution down. @@ -841,53 +842,6 @@ class SyncExecutionResource extends LazyLogging { } } - private def computeSubDAGIfNeeded( - logicalPlan: LogicalPlanPojo, - targetOperatorIds: List[String] - ): LogicalPlanPojo = { - if (targetOperatorIds.length != 1) { - return logicalPlan - } - - val targetOpId = targetOperatorIds.head - val operatorMap: Map[String, LogicalOp] = - logicalPlan.operators.map(op => op.operatorIdentifier.id -> op).toMap - - if (!operatorMap.contains(targetOpId)) { - logger.warn(s"Target operator $targetOpId not found in logical plan, using full DAG") - return logicalPlan - } - - val incomingLinks: Map[String, List[LogicalLink]] = - logicalPlan.links.groupBy(_.toOpId.id) - - val visited = mutable.Set[String]() - val subDagOperators = mutable.ListBuffer[LogicalOp]() - val subDagLinks = mutable.ListBuffer[LogicalLink]() - - def dfs(currentOpId: String): Unit = { - if (visited.contains(currentOpId)) return - visited.add(currentOpId) - - operatorMap.get(currentOpId).foreach { op => - subDagOperators += op - incomingLinks.getOrElse(currentOpId, List.empty).foreach { link => - subDagLinks += link - dfs(link.fromOpId.id) - } - } - } - - dfs(targetOpId) - - LogicalPlanPojo( - operators = subDagOperators.toList, - links = subDagLinks.toList, - opsToViewResult = targetOperatorIds.filter(id => visited.contains(id)), - opsToReuseResult = logicalPlan.opsToReuseResult.filter(id => visited.contains(id)) - ) - } - @GET @Path("/health") def healthCheck: Map[String, String] = Map("status" -> "ok") diff --git a/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala b/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala deleted file mode 100644 index bb21c543625..00000000000 --- a/amber/src/main/scala/org/apache/texera/web/service/CompilingServiceClient.scala +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.texera.web.service - -import com.fasterxml.jackson.databind.JsonNode -import org.apache.texera.amber.config.EnvironmentalVariable -import org.apache.texera.amber.core.workflow.PhysicalPlan -import org.apache.texera.amber.util.JSONUtils.objectMapper -import org.apache.texera.amber.compiler.model.LogicalPlanPojo - -import java.net.{HttpURLConnection, URL} -import java.nio.charset.StandardCharsets -import scala.jdk.CollectionConverters.IteratorHasAsScala - -/** - * Compiles a workflow (logical plan -> physical plan) by calling the workflow-compiling-service's - * `POST /api/compile` over HTTP, instead of running [[org.apache.texera.workflow.WorkflowCompiler]] - * in-process. - * - * This lets the computing-unit master offload compilation to a dedicated service: it serializes the - * [[LogicalPlanPojo]] with [[objectMapper]], posts it with the forwarded `USER_JWT_TOKEN`, and reads - * back the `WorkflowCompilationResponse` union (`success` / `failure`). On `success` it returns the - * deserialized [[PhysicalPlan]] (the same JSONUtils serializers are registered on both ends, so the - * plan round-trips); on `failure` it raises a [[RuntimeException]] carrying the per-operator errors, - * which the caller's existing catch surfaces to the frontend exactly like an in-process failure. - * - * The compiling-service's response classes live in a sibling module that amber does not depend on, - * so the response is parsed from a JSON tree by its `type` discriminator rather than via those - * classes. - */ -object CompilingServiceClient { - - private lazy val userJwtToken: String = - sys.env.getOrElse(EnvironmentalVariable.ENV_USER_JWT_TOKEN, "").trim - - private lazy val endpoint: String = - sys.env - .get(EnvironmentalVariable.ENV_WORKFLOW_COMPILING_SERVICE_ENDPOINT) - .map(_.trim) - .filter(_.nonEmpty) - .getOrElse("http://localhost:9090/api/compile") - - /** - * @throws RuntimeException if compilation fails (message includes the per-operator errors) or the - * service returns a non-2xx response. - */ - def compile( - logicalPlan: LogicalPlanPojo, - workflowId: Long, - executionId: Long - ): PhysicalPlan = compile(endpoint, userJwtToken, logicalPlan, workflowId, executionId) - - /** Endpoint/token are injectable so the HTTP round-trip can be unit-tested in isolation. */ - private[service] def compile( - endpoint: String, - token: String, - logicalPlan: LogicalPlanPojo, - workflowId: Long, - executionId: Long - ): PhysicalPlan = { - val requestUrl = s"$endpoint?workflowId=$workflowId&executionId=$executionId" - val payload = objectMapper.writeValueAsBytes(logicalPlan) - val connection = new URL(requestUrl).openConnection().asInstanceOf[HttpURLConnection] - connection.setRequestMethod("POST") - connection.setRequestProperty("Authorization", s"Bearer $token") - connection.setRequestProperty("Content-Type", "application/json") - connection.setDoOutput(true) - try { - connection.getOutputStream.write(payload) - val code = connection.getResponseCode - if (code < 200 || code >= 300) { - val errorBody = readBody(connection.getErrorStream) - throw new RuntimeException( - s"workflow-compiling-service /compile failed (HTTP $code)" + - (if (errorBody.nonEmpty) s": $errorBody" else "") - ) - } - val response = - objectMapper.readTree(connection.getInputStream.readAllBytes()) - parseResponse(response) - } finally { - connection.disconnect() - } - } - - /** - * Interprets the `WorkflowCompilationResponse` union: returns the [[PhysicalPlan]] on `success`, - * throws with the collected operator errors on `failure`. - */ - private def parseResponse(response: JsonNode): PhysicalPlan = { - val responseType = Option(response.get("type")).map(_.asText()).getOrElse("") - responseType match { - case "success" => - val planNode = response.get("physicalPlan") - if (planNode == null || planNode.isNull) { - throw new RuntimeException( - "workflow-compiling-service returned a success response without a physicalPlan" - ) - } - objectMapper.treeToValue(planNode, classOf[PhysicalPlan]) - - case "failure" => - throw new RuntimeException( - s"Workflow compilation failed: ${formatOperatorErrors(response.get("operatorErrors"))}" - ) - - case other => - throw new RuntimeException( - s"workflow-compiling-service returned an unrecognized response type '$other'" - ) - } - } - - /** Flattens `operatorErrors: Map[operatorId, WorkflowFatalError]` into a readable message. */ - private def formatOperatorErrors(errorsNode: JsonNode): String = { - if (errorsNode == null || !errorsNode.isObject || errorsNode.isEmpty) { - "unknown compilation error" - } else { - errorsNode - .fields() - .asScala - .map { entry => - val opId = entry.getKey - val err = entry.getValue - val message = Option(err.get("message")).map(_.asText()).filter(_.nonEmpty) - val details = Option(err.get("details")).map(_.asText()).filter(_.nonEmpty) - val text = (message ++ details).mkString(" - ") - s"$opId: ${if (text.nonEmpty) text else "compilation error"}" - } - .mkString("; ") - } - } - - private def readBody(stream: java.io.InputStream): String = - if (stream == null) "" - else new String(stream.readAllBytes(), StandardCharsets.UTF_8).trim -} diff --git a/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala b/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala index 2aa0855db86..22143931471 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/ExecutionReconfigurationService.scala @@ -59,8 +59,8 @@ class ExecutionReconfigurationService( val newOp = modifyLogicRequest.operator val opId = newOp.operatorIdentifier // Reconfiguration derives the new physical op from the original logical op. The logical plan is - // only present when the workflow was compiled in-process; when compilation was offloaded to the - // compiling-service the runtime holds only the physical plan, so reconfiguration is unavailable. + // only present when the workflow was compiled in-process; when the client ships a pre-compiled + // physical plan the runtime holds only that, so reconfiguration is unavailable. workflow.logicalPlan match { case None => ModifyLogicResponse( diff --git a/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala b/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala index fc3eabc7b98..77181a88426 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/WorkflowExecutionService.scala @@ -20,8 +20,12 @@ package org.apache.texera.web.service import com.typesafe.scalalogging.LazyLogging -import org.apache.texera.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import org.apache.texera.amber.core.workflow.WorkflowContext +import org.apache.texera.amber.core.virtualidentity.{ + ExecutionIdentity, + OperatorIdentity, + WorkflowIdentity +} +import org.apache.texera.amber.core.workflow.{GlobalPortIdentity, WorkflowContext} import org.apache.texera.amber.engine.architecture.controller.{ControllerConfig, Workflow} import org.apache.texera.amber.engine.architecture.rpc.controlcommands.EmptyRequest import org.apache.texera.amber.engine.architecture.rpc.controlreturns.WorkflowAggregatedState._ @@ -51,6 +55,27 @@ object WorkflowExecutionService { .getLatestExecutionID(workflowId.id.toInt, computingUnitId) .map(eid => new ExecutionIdentity(eid.longValue())) } + + /** + * The non-internal output ports (across all physical operators) of the given to-view logical + * operators. Since the client ships a ready-to-run physical plan, the CU re-derives which ports + * need result storage from that plan plus the requested view operators. Terminal sink ports are + * materialized by the schedule generator regardless of this set. + */ + def outputPortsForViewResult( + physicalPlan: org.apache.texera.amber.core.workflow.PhysicalPlan, + opsToViewResult: Seq[String] + ): Set[GlobalPortIdentity] = { + val viewOps = opsToViewResult.map(OperatorIdentity(_)).toSet + physicalPlan.operators + .filter(physicalOp => viewOps.contains(physicalOp.id.logicalOpId)) + .flatMap { physicalOp => + physicalOp.outputPorts.keys + .filterNot(_.internal) + .map(portId => GlobalPortIdentity(opId = physicalOp.id, portId = portId)) + } + .toSet + } } class WorkflowExecutionService( @@ -103,24 +128,22 @@ class WorkflowExecutionService( var executionConsoleService: ExecutionConsoleService = _ def executeWorkflow(): Unit = { - // Offload compilation to the workflow-compiling-service over HTTP and run the returned plan. - // The runtime does not need the logical plan, so it is left as None on the Workflow. - val physicalPlan = - try { - CompilingServiceClient.compile( - request.logicalPlan, - workflowContext.workflowId.id, - workflowContext.executionId.id - ) - } catch { - case err: Throwable => - // Compilation failed (e.g. an invalid workflow). Surface the error and stop here — - // continuing would dereference a null `workflow` and mask the real failure with an NPE. - errorHandler(err) - return - } + // The client (frontend / agent service) compiles the workflow against the + // workflow-compiling-service and sends the ready-to-run physical plan; the CU just runs it — + // no compilation, no authentication. The runtime does not need the logical plan (None). + val physicalPlan = request.physicalPlan workflow = Workflow(workflowContext, None, physicalPlan) + // Result-storage planning is an execution-time concern that does not travel in the physical + // plan: mark the output ports of the to-view operators as needing storage. (Terminal sink + // ports are materialized by the schedule generator regardless of this set.) + val viewOutputPorts = + WorkflowExecutionService.outputPortsForViewResult(physicalPlan, request.opsToViewResult) + workflowContext.workflowSettings = workflowContext.workflowSettings.copy( + outputPortsNeedingStorage = + workflowContext.workflowSettings.outputPortsNeedingStorage ++ viewOutputPorts + ) + client = ComputingUnitMaster.createAmberRuntime( workflow.context, workflow.physicalPlan, diff --git a/amber/src/test/scala/org/apache/texera/web/service/ClientPhysicalPlanRequestSpec.scala b/amber/src/test/scala/org/apache/texera/web/service/ClientPhysicalPlanRequestSpec.scala new file mode 100644 index 00000000000..72bcc4fc9d3 --- /dev/null +++ b/amber/src/test/scala/org/apache/texera/web/service/ClientPhysicalPlanRequestSpec.scala @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.web.service + +import org.apache.texera.amber.compiler.model.{LogicalLink, LogicalPlanPojo} +import org.apache.texera.amber.core.virtualidentity.OperatorIdentity +import org.apache.texera.amber.core.workflow.{PhysicalPlan, PortIdentity, WorkflowContext, WorkflowSettings} +import org.apache.texera.amber.operator.TestOperators +import org.apache.texera.amber.operator.aggregate.AggregationFunction +import org.apache.texera.amber.util.JSONUtils.objectMapper +import org.apache.texera.web.model.websocket.request.{TexeraWebSocketRequest, WorkflowExecuteRequest} +import org.apache.texera.workflow.WorkflowCompiler +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +/** + * The new architecture has the client compile the workflow and ship a ready-to-run + * [[PhysicalPlan]] to the ComputingUnitMaster inside a [[WorkflowExecuteRequest]]. These tests pin + * the two things that makes possible: the request (with its PhysicalPlan) survives the exact + * polymorphic JSON round-trip the CU's websocket parser performs, and the CU re-derives the + * result-storage ports from the plan + the to-view operators. + */ +class ClientPhysicalPlanRequestSpec extends AnyFlatSpec with Matchers { + + /** Compile CSV scan -> group-by aggregate into a physical plan; return it and the aggregate id. */ + private def compiledPlanAndViewOp(): (PhysicalPlan, String) = { + val csv = TestOperators.smallCsvScanOpDesc() + val agg = + TestOperators.aggregateAndGroupByDesc("Units Sold", AggregationFunction.SUM, List("Country")) + val plan = new WorkflowCompiler(new WorkflowContext()) + .compile( + LogicalPlanPojo( + List(csv, agg), + List( + LogicalLink(csv.operatorIdentifier, PortIdentity(), agg.operatorIdentifier, PortIdentity()) + ), + List.empty, + List.empty + ) + ) + .physicalPlan + (plan, agg.operatorIdentifier.id) + } + + private def buildRequest(plan: PhysicalPlan, viewOps: List[String]): WorkflowExecuteRequest = + WorkflowExecuteRequest( + executionName = "test", + engineVersion = "1.0", + physicalPlan = plan, + opsToViewResult = viewOps, + replayFromExecution = None, + workflowSettings = WorkflowSettings(dataTransferBatchSize = 400), + emailNotificationEnabled = false, + computingUnitId = 0 + ) + + "A WorkflowExecuteRequest carrying a PhysicalPlan" should + "survive the websocket polymorphic JSON round-trip with the plan intact" in { + val (plan, aggId) = compiledPlanAndViewOp() + val request: TexeraWebSocketRequest = buildRequest(plan, List(aggId)) + + // Mirror WorkflowWebsocketResource: serialize via the polymorphic base ("type" discriminator), + // then read it back as the base and dispatch on the concrete request type. + val json = objectMapper.writeValueAsString(request) + json should include(""""type":"WorkflowExecuteRequest"""") + val back = objectMapper + .readValue(json, classOf[TexeraWebSocketRequest]) + .asInstanceOf[WorkflowExecuteRequest] + + back.opsToViewResult shouldBe List(aggId) + back.physicalPlan.operators.map(_.id) shouldBe plan.operators.map(_.id) + back.physicalPlan.links shouldBe plan.links + // The runtime-critical executor descriptor of every operator survives. + plan.operators.foreach { op => + back.physicalPlan.getOperator(op.id).opExecInitInfo shouldBe op.opExecInitInfo + } + } + + "outputPortsForViewResult" should "select exactly the to-view operators' non-internal output ports" in { + val (plan, aggId) = compiledPlanAndViewOp() + + val ports = WorkflowExecutionService.outputPortsForViewResult(plan, List(aggId)) + ports should not be empty + ports.foreach(_.opId.logicalOpId shouldBe OperatorIdentity(aggId)) + ports.foreach(_.portId.internal shouldBe false) + + // No to-view operators -> no storage ports requested (terminal sinks are handled by the scheduler). + WorkflowExecutionService.outputPortsForViewResult(plan, List.empty) shouldBe empty + // An unknown operator id contributes nothing. + WorkflowExecutionService.outputPortsForViewResult(plan, List("does-not-exist")) shouldBe empty + } +} diff --git a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala index c6b8090a2de..95010f2a437 100644 --- a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala +++ b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala @@ -47,9 +47,6 @@ object EnvironmentalVariable { val ENV_DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT = "DASHBOARD_SERVICE_EXECUTION_METADATA_ENDPOINT" - // Endpoint of the workflow-compiling-service's /compile, so a computing unit can compile a - // workflow (logical plan -> physical plan) over HTTP instead of running the compiler in-process. - val ENV_WORKFLOW_COMPILING_SERVICE_ENDPOINT = "WORKFLOW_COMPILING_SERVICE_ENDPOINT" // When "true", the computing unit routes execution-metadata operations over HTTP instead of JDBC. val ENV_EXECUTION_METADATA_REMOTE = "EXECUTION_METADATA_REMOTE" diff --git a/frontend/src/app/workspace/service/compile-workflow/workflow-compiling.service.ts b/frontend/src/app/workspace/service/compile-workflow/workflow-compiling.service.ts index 9648d2b305f..84a5e9392e7 100644 --- a/frontend/src/app/workspace/service/compile-workflow/workflow-compiling.service.ts +++ b/frontend/src/app/workspace/service/compile-workflow/workflow-compiling.service.ts @@ -37,6 +37,7 @@ import { } from "../../types/workflow-compiling.interface"; import { WorkflowFatalError } from "../../types/workflow-websocket.interface"; import { LogicalPlan } from "../../types/execute-workflow.interface"; +import { PhysicalPlan } from "../../../common/type/physical-plan"; import { ValidationWorkflowService } from "../validation/validation-workflow.service"; import { WorkflowGraphReadonly } from "../workflow-graph/model/workflow-graph"; import { serializePortIdentity } from "../../../common/util/port-identity-serde"; @@ -123,6 +124,22 @@ export class WorkflowCompilingService { return this.currentCompilationStateInfo.state; } + /** The physical plan from the most recent successful compilation, if any. */ + public getPhysicalPlan(): PhysicalPlan | undefined { + return this.currentCompilationStateInfo.state === CompilationState.Succeeded + ? this.currentCompilationStateInfo.physicalPlan + : undefined; + } + + /** + * Compile the given logical plan and emit the response (physical plan + schemas, or errors). + * Used on Run so the client ships a freshly-compiled physical plan to the Computing Unit, which + * then runs it directly without compiling. + */ + public compileWorkflow(logicalPlan: LogicalPlan): Observable { + return this.compile(logicalPlan); + } + public getWorkflowCompilationErrors(): Readonly> { if ( this.currentCompilationStateInfo.state === CompilationState.Succeeded || diff --git a/frontend/src/app/workspace/service/execute-workflow/execute-workflow.service.ts b/frontend/src/app/workspace/service/execute-workflow/execute-workflow.service.ts index d3d7d23d179..1e402142f74 100644 --- a/frontend/src/app/workspace/service/execute-workflow/execute-workflow.service.ts +++ b/frontend/src/app/workspace/service/execute-workflow/execute-workflow.service.ts @@ -29,6 +29,7 @@ import { LogicalPlan, } from "../../types/execute-workflow.interface"; import { WorkflowWebsocketService } from "../workflow-websocket/workflow-websocket.service"; +import { WorkflowCompilingService } from "../compile-workflow/workflow-compiling.service"; import { OperatorCurrentTuples, RegionStateEvent, @@ -100,7 +101,8 @@ export class ExecuteWorkflowService { private workflowStatusService: WorkflowStatusService, private notificationService: NotificationService, @Inject(DOCUMENT) private document: Document, - private computingUnitStatusService: ComputingUnitStatusService + private computingUnitStatusService: ComputingUnitStatusService, + private workflowCompilingService: WorkflowCompilingService ) { workflowWebsocketService.websocketEvent().subscribe(event => { switch (event.type) { @@ -246,18 +248,30 @@ export class ExecuteWorkflowService { console.warn("No computing unit selected for workflow execution"); } - const workflowExecuteRequest = { - executionName: executionName, - engineVersion: version.hash, - logicalPlan: logicalPlan, - replayFromExecution: replayExecutionInfo, - workflowSettings: workflowSettings, - emailNotificationEnabled: emailNotificationEnabled, - computingUnitId: computingUnitId, // Include the computing unit ID - }; - // wait for the form debounce to complete, then send + // The Computing Unit runs a pre-compiled physical plan and never compiles or authenticates. + // Compile the workflow here (via the workflow-compiling-service) and ship the resulting physical + // plan; on a compile failure, surface it and do not start a run. Wait for the form debounce so + // the latest property edits are reflected before compiling. window.setTimeout(() => { - this.workflowWebsocketService.send("WorkflowExecuteRequest", workflowExecuteRequest); + this.workflowCompilingService.compileWorkflow(logicalPlan).subscribe(response => { + if (!response.physicalPlan) { + this.notificationService.error( + "Workflow compilation failed — resolve the operator errors and run again." + ); + return; + } + const workflowExecuteRequest = { + executionName: executionName, + engineVersion: version.hash, + physicalPlan: response.physicalPlan, + opsToViewResult: logicalPlan.opsToViewResult ?? [], + replayFromExecution: replayExecutionInfo, + workflowSettings: workflowSettings, + emailNotificationEnabled: emailNotificationEnabled, + computingUnitId: computingUnitId, // Include the computing unit ID + }; + this.workflowWebsocketService.send("WorkflowExecuteRequest", workflowExecuteRequest); + }); }, FORM_DEBOUNCE_TIME_MS); // add flag for new execution of workflow diff --git a/frontend/src/app/workspace/service/workflow-websocket/workflow-websocket.service.ts b/frontend/src/app/workspace/service/workflow-websocket/workflow-websocket.service.ts index 017e43ee6a4..3825387a6f3 100644 --- a/frontend/src/app/workspace/service/workflow-websocket/workflow-websocket.service.ts +++ b/frontend/src/app/workspace/service/workflow-websocket/workflow-websocket.service.ts @@ -29,7 +29,6 @@ import { TexeraWebsocketRequestTypes, } from "../../types/workflow-websocket.interface"; import { delayWhen, filter, map, retryWhen, tap } from "rxjs/operators"; -import { AuthService } from "../../../common/service/user/auth.service"; import { getWebsocketUrl } from "src/app/common/util/url"; import { isDefined } from "../../../common/util/predicate"; import { GuiConfigService } from "../../../common/service/gui-config.service"; @@ -103,14 +102,15 @@ export class WorkflowWebsocketService { console.log(`uId is ${uId}, defaulting to uId = 1`); uId = 1; } + // No access-token: the Computing Unit does not authenticate the websocket — it runs the + // pre-compiled physical plan the client sends. const websocketUrl = getWebsocketUrl(WorkflowWebsocketService.TEXERA_WEBSOCKET_ENDPOINT, "") + "?wid=" + wId + "&uid=" + uId + - (isDefined(cuId) ? `&cuid=${cuId}` : "") + - (AuthService.getAccessToken() !== null ? "&access-token=" + AuthService.getAccessToken() : ""); + (isDefined(cuId) ? `&cuid=${cuId}` : ""); console.log("websocketUrl", websocketUrl); this.websocket = webSocket(websocketUrl); // setup reconnection logic diff --git a/frontend/src/app/workspace/types/workflow-websocket.interface.ts b/frontend/src/app/workspace/types/workflow-websocket.interface.ts index afd5ea6f04a..3b7758fc25c 100644 --- a/frontend/src/app/workspace/types/workflow-websocket.interface.ts +++ b/frontend/src/app/workspace/types/workflow-websocket.interface.ts @@ -28,6 +28,7 @@ import { import { IndexableObject } from "./result-table.interface"; import { ConsoleUpdateEvent } from "./workflow-common.interface"; import { SchemaAttribute } from "./workflow-compiling.interface"; +import { PhysicalPlan } from "../../common/type/physical-plan"; /** * @fileOverview Type Definitions of WebSocket (Ws) API @@ -46,7 +47,10 @@ export interface WorkflowExecuteRequest extends Readonly<{ executionName: string; engineVersion: string; - logicalPlan: LogicalPlan; + // The client compiles the workflow and sends the ready-to-run physical plan; the Computing + // Unit runs it directly without compiling. opsToViewResult marks which operators' results to store. + physicalPlan: PhysicalPlan; + opsToViewResult: ReadonlyArray; }> {} export interface ReplayExecutionInfo