From f86a598500285c58b7b7945bd34400f1a4b02ecd Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sun, 3 May 2026 23:58:02 -0700 Subject: [PATCH 1/2] test(workflow-core): add unit test coverage for TupleUtils Pin the contract of `TupleUtils`: - `tuple2json` emits one JSON field per schema attribute in the schema's declared order, indexes `fieldVals` via `schema.getIndex` (so a reordered schema picks different slots), emits JSON null for null values, and yields an empty object for an empty schema. - `json2tuple` infers a schema from a flat JSON object, round-trips faithfully alongside `tuple2json`, drops non-object roots into an empty tuple (rather than silently succeeding with a populated tuple), and throws on malformed JSON. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../amber/core/tuple/TupleUtilsSpec.scala | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala diff --git a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala new file mode 100644 index 00000000000..13218276ec5 --- /dev/null +++ b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.tuple + +import org.scalatest.flatspec.AnyFlatSpec + +class TupleUtilsSpec extends AnyFlatSpec { + + // --- tuple2json ------------------------------------------------------------ + + "TupleUtils.tuple2json" should "emit one JSON field per schema attribute, in the schema's declared order" in { + val schema = new Schema( + new Attribute("id", AttributeType.INTEGER), + new Attribute("name", AttributeType.STRING) + ) + val node = TupleUtils.tuple2json(schema, Array[Any](Int.box(7), "alice")) + // Field iteration order on Jackson ObjectNode follows insertion order, + // which mirrors the schema's getAttributeNames order. + val keys = new java.util.ArrayList[String]() + node.fieldNames().forEachRemaining(k => keys.add(k)) + assert(scala.jdk.CollectionConverters.ListHasAsScala(keys).asScala.toList == List("id", "name")) + assert(node.get("id").asInt() == 7) + assert(node.get("name").asText() == "alice") + } + + it should "emit JSON null for null field values" in { + val schema = new Schema(new Attribute("v", AttributeType.STRING)) + val node = TupleUtils.tuple2json(schema, Array[Any](null)) + assert(node.get("v").isNull) + } + + it should "respect schema.getIndex when fieldVals is laid out positionally" in { + // Re-ordering the schema must change which slot of fieldVals each + // attribute pulls from, because tuple2json indexes fieldVals via + // schema.getIndex(attrName). + val schema = new Schema( + new Attribute("b", AttributeType.STRING), + new Attribute("a", AttributeType.STRING) + ) + val node = TupleUtils.tuple2json(schema, Array[Any]("first", "second")) + assert(node.get("b").asText() == "first") + assert(node.get("a").asText() == "second") + } + + it should "produce an empty object for an empty schema" in { + val node = TupleUtils.tuple2json(new Schema(), Array.empty[Any]) + assert(node.size() == 0) + } + + // --- json2tuple ------------------------------------------------------------ + + "TupleUtils.json2tuple" should "infer a schema from a flat JSON object's keys and types" in { + val tuple = TupleUtils.json2tuple("""{"name": "bob", "age": 30}""") + val names = tuple.getSchema.getAttributeNames.toSet + assert(names == Set("name", "age")) + assert(tuple.getField[Any]("name") == "bob") + // age is parsed via inferSchemaFromRows; the inferred type for "30" is + // a numeric type — assert we can read the field rather than locking in + // the precise inferred AttributeType. + assert(tuple.getField[Any]("age").toString == "30") + } + + it should "round-trip a schema-and-values through tuple2json → json2tuple" in { + val schema = new Schema( + new Attribute("city", AttributeType.STRING), + new Attribute("score", AttributeType.INTEGER) + ) + val original = TupleUtils.tuple2json(schema, Array[Any]("Irvine", Int.box(42))).toString + val parsed = TupleUtils.json2tuple(original) + val reSerialized = + TupleUtils.tuple2json(parsed.getSchema, parsed.getFields.toArray.asInstanceOf[Array[Any]]) + // The exact column order isn't part of the json2tuple contract (it builds + // schemaFieldNames from a Set), so compare by JSON-tree equality. + val mapper = org.apache.texera.amber.util.JSONUtils.objectMapper + assert(mapper.readTree(reSerialized.toString) == mapper.readTree(original)) + } + + it should "drop non-object roots (e.g. a JSON array) into an empty tuple" in { + // The implementation only collects fields when the root `isObject`. A + // non-object root leaves `fieldNames` empty, so the result is a tuple + // over an empty schema with no fields — observed contract is no-throw, + // empty result. + val tuple = TupleUtils.json2tuple("""[1, 2, 3]""") + assert(tuple.getSchema.getAttributes.isEmpty) + assert(tuple.getFields.isEmpty) + } + + it should "throw when given malformed JSON" in { + intercept[Exception] { + TupleUtils.json2tuple("{ this is not json }") + } + } +} From 9e959ae0888b02b305c110da9ea33fd3a8455d1d Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Mon, 4 May 2026 00:14:07 -0700 Subject: [PATCH 2/2] test(workflow-core): clean up CollectionConverters usage in TupleUtilsSpec Address Copilot feedback on #4910: replace the awkward `scala.jdk.CollectionConverters.ListHasAsScala(keys).asScala` (which explicitly constructs the implicit-class wrapper) with the idiomatic Scala 2.13 form: `import scala.jdk.CollectionConverters._` and call `.asScala` directly on the returned `java.util.Iterator[String]`. Drops the intermediate `java.util.ArrayList` buffer entirely. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala index 13218276ec5..3ac9a50ac1d 100644 --- a/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala +++ b/common/workflow-core/src/test/scala/org/apache/texera/amber/core/tuple/TupleUtilsSpec.scala @@ -21,6 +21,8 @@ package org.apache.texera.amber.core.tuple import org.scalatest.flatspec.AnyFlatSpec +import scala.jdk.CollectionConverters._ + class TupleUtilsSpec extends AnyFlatSpec { // --- tuple2json ------------------------------------------------------------ @@ -33,9 +35,7 @@ class TupleUtilsSpec extends AnyFlatSpec { val node = TupleUtils.tuple2json(schema, Array[Any](Int.box(7), "alice")) // Field iteration order on Jackson ObjectNode follows insertion order, // which mirrors the schema's getAttributeNames order. - val keys = new java.util.ArrayList[String]() - node.fieldNames().forEachRemaining(k => keys.add(k)) - assert(scala.jdk.CollectionConverters.ListHasAsScala(keys).asScala.toList == List("id", "name")) + assert(node.fieldNames().asScala.toList == List("id", "name")) assert(node.get("id").asInt() == 7) assert(node.get("name").asText() == "alice") }