From 70ceb84910ae5e7ca8575f72935f2fce41c9c3cb Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 3 Aug 2018 00:16:19 +0800 Subject: [PATCH] revise avro namespace --- .../spark/sql/avro/SchemaConverters.scala | 6 +++++- .../org/apache/spark/sql/avro/AvroSuite.scala | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala index 87fae63aeff2b..dd24683cd0dd6 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala @@ -124,7 +124,11 @@ object SchemaConverters { case MapType(StringType, vt, valueContainsNull) => builder.map().values(toAvroType(vt, valueContainsNull, recordName, prevNameSpace)) case st: StructType => - val nameSpace = s"$prevNameSpace.$recordName" + val nameSpace = prevNameSpace match { + case "" => recordName + case _ => s"$prevNameSpace.$recordName" + } + val fieldsAssembler = builder.record(recordName).namespace(nameSpace).fields() st.foreach { f => val fieldAvroType = toAvroType(f.dataType, f.nullable, f.name, nameSpace) diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index c221c4fd07de7..77cfce953048a 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -795,6 +795,23 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils { case class NestedTop(id: Int, data: NestedMiddle) + test("Validate namespace in avro file that has nested records with the same name") { + withTempPath { dir => + val writeDf = spark.createDataFrame(List(NestedTop(1, NestedMiddle(2, NestedBottom(3, "1"))))) + writeDf.write.format("avro").save(dir.toString) + val file = new File(dir.toString) + .listFiles() + .filter(_.isFile) + .filter(_.getName.endsWith("avro")) + .head + val reader = new DataFileReader(file, new GenericDatumReader[Any]()) + val schema = reader.getSchema.toString() + assert(schema.contains("\"namespace\":\"topLevelRecord\"")) + assert(schema.contains("\"namespace\":\"topLevelRecord.data\"")) + assert(schema.contains("\"namespace\":\"topLevelRecord.data.data\"")) + } + } + test("saving avro that has nested records with the same name") { withTempPath { tempDir => // Save avro file on output folder path