diff --git a/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala b/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala index b335ed0c3c7..20446bb998a 100644 --- a/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala +++ b/amber/src/main/scala/org/apache/texera/web/service/ExecutionResultService.scala @@ -62,6 +62,7 @@ import java.lang.Byte.{SIZE => BitsPerByte} import java.util.UUID import scala.collection.mutable import scala.concurrent.duration.DurationInt +import scala.language.existentials object ExecutionResultService { diff --git a/build.sbt b/build.sbt index 8268fc7a117..3767e7a6c21 100644 --- a/build.sbt +++ b/build.sbt @@ -24,6 +24,11 @@ import com.typesafe.sbt.packager.universal.UniversalPlugin.autoImport.Universal ThisBuild / Test / javaOptions ++= JdkOptions.jvmFlags((ThisBuild / baseDirectory).value) +// Fail Java compilation on deprecation warnings so PRs can't reintroduce +// deprecated-API patterns (e.g. scala.collection.JavaConverters in Java +// callers — the modern Java entry point is scala.jdk.javaapi.CollectionConverters). +// -Xlint:deprecation surfaces the per-call-site location, -Werror turns it fatal. +ThisBuild / Compile / javacOptions ++= Seq("-Xlint:deprecation", "-Werror") // Emit one JUnit-XML file per spec under each module's target/test-reports/. // Codecov Test Analytics ingests these via `report_type: test_results` to // surface failing-test stack traces in PR comments and flag tests that have diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala index 59713f0d6dc..70892afc584 100644 --- a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala +++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/BoundaryValidator.scala @@ -21,6 +21,54 @@ package org.apache.texera.amber.pybuilder import scala.reflect.macros.blackbox +object BoundaryValidator { + + // These are internal data carriers for the macro pipeline: + // - constructed by PythonTemplateBuilder's macro, + // - passed straight into validator methods that read fields, + // - never pattern-matched, never copied, never compared for equality. + // Plain classes (with companion `apply` factories) keep the same call-site + // syntax (`BoundaryValidator.CompileTimeContext(...)`) without dragging in + // the auto-generated case-class equals/hashCode/copy/Product/unapply + // bytecode that runs only at compile time and so can never be covered by + // runtime tests. + final class CompileTimeContext[Pos]( + val leftPart: String, + val rightPart: String, + val prefixSource: String, + val argIndex: Int, + val errorPos: Pos + ) + + object CompileTimeContext { + def apply[Pos]( + leftPart: String, + rightPart: String, + prefixSource: String, + argIndex: Int, + errorPos: Pos + ): CompileTimeContext[Pos] = + new CompileTimeContext[Pos](leftPart, rightPart, prefixSource, argIndex, errorPos) + } + + final class RuntimeContext( + val leftPart: String, + val rightPart: String, + val prefixSource: String, + val argIndex: Int + ) + + object RuntimeContext { + def apply( + leftPart: String, + rightPart: String, + prefixSource: String, + argIndex: Int + ): RuntimeContext = + new RuntimeContext(leftPart, rightPart, prefixSource, argIndex) + } +} + /** * Macro-only helper: validates boundaries for Encodable insertions. * @@ -30,6 +78,7 @@ import scala.reflect.macros.blackbox final class BoundaryValidator[C <: blackbox.Context](val c: C) { import PythonLexerUtils._ import c.universe._ + import BoundaryValidator.{CompileTimeContext, RuntimeContext} /** * Centralized, templatized error messages (Option A). @@ -75,22 +124,7 @@ final class BoundaryValidator[C <: blackbox.Context](val c: C) { "Add whitespace or punctuation to separate tokens." } - final case class CompileTimeContext( - leftPart: String, - rightPart: String, - prefixSource: String, - argIndex: Int, - errorPos: Position - ) - - final case class RuntimeContext( - leftPart: String, - rightPart: String, - prefixSource: String, - argIndex: Int - ) - - def validateCompileTime(ctx: CompileTimeContext): Unit = { + def validateCompileTime(ctx: CompileTimeContext[Position]): Unit = { val prefixLine = lineTail(ctx.prefixSource) val argNum = ctx.argIndex + 1 diff --git a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala index a79f162de1f..73f4a3846dd 100644 --- a/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala +++ b/common/pybuilder/src/main/scala/org/apache/texera/amber/pybuilder/PythonTemplateBuilder.scala @@ -363,7 +363,13 @@ object PythonTemplateBuilder { if (argExpr.tree.pos != NoPosition) argExpr.tree.pos else macroCtx.enclosingPosition validator.validateCompileTime( - validator.CompileTimeContext(leftPart, rightPart, prefixSource, argIndex, errorPos) + BoundaryValidator.CompileTimeContext( + leftPart, + rightPart, + prefixSource, + argIndex, + errorPos + ) ) case _ => // no-op @@ -414,7 +420,7 @@ object PythonTemplateBuilder { val argIdent = Ident(TermName(s"__pyb_arg$argIndex")) validator.runtimeChecksForNestedBuilder( - validator.RuntimeContext(leftPart, rightPart, prefixSource, argIndex), + BoundaryValidator.RuntimeContext(leftPart, rightPart, prefixSource, argIndex), argIdent ) diff --git a/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/BoundaryValidatorSpec.scala b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/BoundaryValidatorSpec.scala new file mode 100644 index 00000000000..d009b52fdec --- /dev/null +++ b/common/pybuilder/src/test/scala/org/apache/texera/amber/pybuilder/BoundaryValidatorSpec.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.pybuilder + +import org.apache.texera.amber.pybuilder.BoundaryValidator.{CompileTimeContext, RuntimeContext} +import org.scalatest.funsuite.AnyFunSuite + +/** + * Characterization tests for the data carriers on `BoundaryValidator`'s + * companion. In production the macro is the only place that constructs + * these, so Jacoco never sees them at runtime; this spec pins the + * apply/accessor contract that the rest of the macro pipeline depends on. + */ +class BoundaryValidatorSpec extends AnyFunSuite { + + test("BoundaryValidator companion object is loadable") { + // Force a direct reference to the outer companion (not just the nested + // CompileTimeContext / RuntimeContext) so its static initializer is + // exercised by Jacoco. + assert(BoundaryValidator.getClass.getName.endsWith("BoundaryValidator$")) + } + + test("RuntimeContext apply binds every constructor argument to a val") { + val ctx = RuntimeContext( + leftPart = "left", + rightPart = "right", + prefixSource = "prefix", + argIndex = 0 + ) + + assert(ctx.leftPart == "left") + assert(ctx.rightPart == "right") + assert(ctx.prefixSource == "prefix") + assert(ctx.argIndex == 0) + } + + // Use a plain String for the `Pos` type parameter so the spec doesn't have + // to pull in a macro `Context`. The class is generic precisely so tests + // like this can construct it without a Universe. + test("CompileTimeContext apply binds every constructor argument including the generic errorPos") { + val ctx = CompileTimeContext[String]( + leftPart = "left", + rightPart = "right", + prefixSource = "prefix", + argIndex = 3, + errorPos = "Foo.scala:42" + ) + + assert(ctx.leftPart == "left") + assert(ctx.rightPart == "right") + assert(ctx.prefixSource == "prefix") + assert(ctx.argIndex == 3) + assert(ctx.errorPos == "Foo.scala:42") + } +} diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/TestOperators.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/TestOperators.scala index 268b06ff8be..b3c38735957 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/TestOperators.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/TestOperators.scala @@ -31,7 +31,6 @@ import org.apache.texera.amber.operator.keywordSearch.KeywordSearchOpDesc import org.apache.texera.amber.operator.source.scan.csv.CSVScanSourceOpDesc import org.apache.texera.amber.operator.source.scan.json.JSONLScanSourceOpDesc import org.apache.texera.amber.operator.source.sql.asterixdb.AsterixDBSourceOpDesc -import org.apache.texera.amber.operator.source.sql.mysql.MySQLSourceOpDesc import org.apache.texera.amber.operator.udf.python.PythonUDFOpDescV2 import org.apache.texera.amber.operator.udf.python.source.PythonUDFSourceOpDescV2 @@ -140,25 +139,6 @@ object TestOperators { aggOp } - def inMemoryMySQLSourceOpDesc( - host: String, - port: String, - database: String, - table: String, - username: String, - password: String - ): MySQLSourceOpDesc = { - val inMemoryMySQLSourceOpDesc = new MySQLSourceOpDesc() - inMemoryMySQLSourceOpDesc.host = host - inMemoryMySQLSourceOpDesc.port = port - inMemoryMySQLSourceOpDesc.database = database - inMemoryMySQLSourceOpDesc.table = table - inMemoryMySQLSourceOpDesc.username = username - inMemoryMySQLSourceOpDesc.password = password - inMemoryMySQLSourceOpDesc.limit = Some(1000) - inMemoryMySQLSourceOpDesc - } - // TODO: use mock data to perform the test, remove dependency on the real AsterixDB def asterixDBSourceOpDesc(): AsterixDBSourceOpDesc = { val asterixDBOp = new AsterixDBSourceOpDesc() diff --git a/file-service/src/main/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializer.java b/file-service/src/main/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializer.java index d0731fdec71..70b24270eaa 100644 --- a/file-service/src/main/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializer.java +++ b/file-service/src/main/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializer.java @@ -23,8 +23,8 @@ import com.fasterxml.jackson.databind.SerializerProvider; import com.fasterxml.jackson.databind.ser.std.StdSerializer; import org.apache.texera.service.type.DatasetFileNode; -import scala.collection.JavaConverters; import scala.collection.immutable.List; +import scala.jdk.javaapi.CollectionConverters; import java.io.IOException; @@ -53,7 +53,7 @@ public void serialize(DatasetFileNode value, JsonGenerator gen, SerializerProvid gen.writeFieldName("children"); gen.writeStartArray(); List children = value.getChildren(); - for (DatasetFileNode child : JavaConverters.seqAsJavaList(children)) { + for (DatasetFileNode child : CollectionConverters.asJava(children)) { serialize(child, gen, provider); // Recursively serialize children } gen.writeEndArray(); diff --git a/file-service/src/test/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializerSpec.scala b/file-service/src/test/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializerSpec.scala new file mode 100644 index 00000000000..4a74ad01432 --- /dev/null +++ b/file-service/src/test/scala/org/apache/texera/service/type/serde/DatasetFileNodeSerializerSpec.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.service.`type`.serde + +import com.fasterxml.jackson.databind.module.SimpleModule +import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper} +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import org.apache.texera.service.`type`.DatasetFileNode +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class DatasetFileNodeSerializerSpec extends AnyFlatSpec with Matchers { + + private val mapper: ObjectMapper = { + val m = new ObjectMapper() + // DefaultScalaModule lets Jackson unwrap scala.Option for the "size" field. + m.registerModule(DefaultScalaModule) + val module = new SimpleModule() + module.addSerializer(classOf[DatasetFileNode], new DatasetFileNodeSerializer()) + m.registerModule(module) + m + } + + private def asJson(node: DatasetFileNode): JsonNode = + mapper.readTree(mapper.writeValueAsString(node)) + + // The serializer dereferences value.getParent().getFilePath(), so every node it + // sees needs a non-null parent. Tests build a tree rooted at "/" and serialize + // its descendants. + private def rootDir: DatasetFileNode = + new DatasetFileNode("/", "directory", null, "") + + "DatasetFileNodeSerializer" should "serialize a file node with size and no children field" in { + val root = rootDir + val owner = new DatasetFileNode("alice@example.com", "directory", root, "alice@example.com") + val file = new DatasetFileNode("data.csv", "file", owner, "alice@example.com", Some(100L)) + + val json = asJson(file) + + json.get("name").asText() shouldBe "data.csv" + json.get("type").asText() shouldBe "file" + json.get("parentDir").asText() shouldBe "/alice@example.com" + json.get("ownerEmail").asText() shouldBe "alice@example.com" + json.get("size").asLong() shouldBe 100L + json.has("children") shouldBe false + } + + it should "recursively serialize a directory and its children" in { + val root = rootDir + val owner = new DatasetFileNode("alice@example.com", "directory", root, "alice@example.com") + val file = new DatasetFileNode("data.csv", "file", owner, "alice@example.com", Some(100L)) + val subdir = new DatasetFileNode("subdir", "directory", owner, "alice@example.com") + val nested = new DatasetFileNode("nested.txt", "file", subdir, "alice@example.com", Some(200L)) + subdir.children = Some(List(nested)) + owner.children = Some(List(file, subdir)) + + val json = asJson(owner) + + json.get("name").asText() shouldBe "alice@example.com" + json.get("type").asText() shouldBe "directory" + json.get("parentDir").asText() shouldBe "/" + val children = json.get("children") + children.isArray shouldBe true + children.size() shouldBe 2 + children.get(0).get("name").asText() shouldBe "data.csv" + children.get(0).get("size").asLong() shouldBe 100L + children.get(1).get("name").asText() shouldBe "subdir" + children.get(1).get("children").get(0).get("name").asText() shouldBe "nested.txt" + children.get(1).get("children").get(0).get("size").asLong() shouldBe 200L + } + + it should "emit an empty children array for a directory with no children" in { + val root = rootDir + val empty = new DatasetFileNode("empty", "directory", root, "alice@example.com") + + val json = asJson(empty) + + json.get("type").asText() shouldBe "directory" + val children = json.get("children") + children.isArray shouldBe true + children.size() shouldBe 0 + } +}