cached parser and elimination of SchemaStore

cory-p-oncota · Dec 7, 2015 · 079be7b · 079be7b
1 parent e309c47
commit 079be7b
Show file tree

Hide file tree

Showing 9 changed files with 54 additions and 25 deletions.
diff --git a/macros/src/main/scala/avro/scala/macro/annotations/AvroTypeProviderMacro.scala b/macros/src/main/scala/avro/scala/macro/annotations/AvroTypeProviderMacro.scala
@@ -13,6 +13,8 @@ import com.typesafe.scalalogging._
 
 object AvroTypeProviderMacro extends LazyLogging {
 
+  var parsedFiles: Vector[String] = Vector.empty[String]
+
   def impl(c: Context)(annottees: c.Expr[Any]*): c.Expr[Any] = {
     import c.universe._
     import Flag._
@@ -38,11 +40,19 @@ object AvroTypeProviderMacro extends LazyLogging {
           // helpful for IDE users who may not be able to easily see where their files live
           logger.info(s"Current path: ${new File(".").getAbsolutePath}")
 
-          // get the schema for the record that this class represents
           val avroFilePath = FilePathProbe.getPath(c)
-          val infile = new File(avroFilePath)
-          val fileSchemas = FileParser.getSchemas(infile)
-          val nestedSchemas = fileSchemas.flatMap(NestedSchemaExtractor.getNestedSchemas)
+          println("file: " + avroFilePath)
+          // get the schema for the record that this class represents
+          val nestedSchemas = if (!parsedFiles.contains(avroFilePath)) {
+            println("creating from file: " + avroFilePath)
+            val infile = new File(avroFilePath)
+            val fileSchemas = FileParser.getSchemas(infile)
+            parsedFiles = parsedFiles :+ avroFilePath
+            fileSchemas.flatMap(NestedSchemaExtractor.getNestedSchemas)
+          } else {
+            println("reusing file: " + avroFilePath)
+            FileParser.getDefinitionByName(fullName)
+          }
           // first try matching schema record full name to class full name, then by the
           // regular name in case we're trying to read from a non-namespaced schema
           val classSchema = nestedSchemas.find(s => s.getFullName == fullName)

diff --git a/macros/src/main/scala/avro/scala/macro/annotations/provider/FileParser.scala b/macros/src/main/scala/avro/scala/macro/annotations/provider/FileParser.scala
@@ -9,7 +9,7 @@ import org.apache.avro.Schema.Type._
 
 import scala.collection.JavaConverters._
 
-object FileParser {
+object FileParser extends Parser {
 
   def getSchemas(infile: java.io.File): List[Schema] = {
     val schema = infile.getName.split("\\.").last match {
@@ -18,18 +18,29 @@ object FileParser {
         val dfr = new DataFileReader(infile, gdr)
         dfr.getSchema
       case "avsc" =>
-        new Parser().parse(infile)
+        this.parse(infile)
       case _ => throw new Exception("Invalid file ending. Must be .avsc for plain text json files and .avro for binary files.")
     }
+    getDefinition(schema)
+  }
+
+  def getDefinition(schema: Schema): List[Schema] = {
     schema.getType match {
       case UNION  => {
         val recordSchemas = schema.getTypes.asScala.toList.filter(_.getType == RECORD)
         if (recordSchemas.nonEmpty) recordSchemas
-        else sys.error("no record type found in the union from " + infile)
+        else sys.error("no record type found in the union from " + schema.getFullName)
       }
       case RECORD => List(schema)
       case _      => sys.error("The Schema in the datafile is neither a record nor a union of a record type, nothing to map to case class.")
     }
   }
 
+  def getDefinitionByName(fullName: String): List[Schema] = {
+    if (this.getTypes.containsKey(fullName)) {
+      getDefinition(this.getTypes.get(fullName))
+    }  else {
+      sys.error("The schema name provided has not been defined")
+    }
+  }
 }
diff --git a/...s/src/main/scala/avro/scala/macro/annotations/record/schemagen/FieldSchemaGenerator.scala b/...s/src/main/scala/avro/scala/macro/annotations/record/schemagen/FieldSchemaGenerator.scala
@@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
 package record
 package schemagen
 
+import com.julianpeeters.avro.annotations.provider.FileParser
+
 import scala.reflect.macros.blackbox.Context
 
 import collection.JavaConversions._
@@ -53,7 +55,8 @@ abstract class FieldSchemaGenerator {
         // if a case class (a nested record) is found, reuse the schema that was made and stored when its macro was expanded.
         // unsuccessful alternatives: reflectively getting the schema from its companion (can't get a tree from a Symbol),
         // or regenerating the schema (no way to get default param values from outside the current at compile time).
-        SchemaStore.schemas(x.toString)
+        println(x.toString)
+        FileParser.getTypes.get(x.toString)
       }
       case x => throw new UnsupportedOperationException("Could not generate schema. Cannot support yet: " + x )
     }

diff --git a/.../src/main/scala/avro/scala/macro/annotations/record/schemagen/RecordSchemaGenerator.scala b/.../src/main/scala/avro/scala/macro/annotations/record/schemagen/RecordSchemaGenerator.scala
@@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
 package record
 package schemagen
 
+import com.julianpeeters.avro.annotations.provider.FileParser
+
 import collection.JavaConversions._
 import java.util.{Arrays => JArrays}
 import org.apache.avro.Schema
@@ -11,13 +13,15 @@ import collection.JavaConversions._
 
 object RecordSchemaGenerator {
 
-
     def createSchema(className: String, namespace: String, avroFields: List[Field]) = {
-    	val avroSchema = Schema.createRecord(className, "Auto-Generated Schema", namespace, false)
-      avroSchema.setFields(JArrays.asList(avroFields.toArray:_*))
-      SchemaStore.accept(avroSchema)
-      avroSchema
-    }
-
 
+      if (FileParser.getTypes.containsKey(namespace + "." + className)) {
+        FileParser.getTypes.get(namespace + "." + className)
+      } else {
+        val avroSchema = Schema.createRecord(className, "Auto-Generated Schema", namespace, false)
+        avroSchema.setFields(JArrays.asList(avroFields.toArray:_*))
+        FileParser.addTypes(Map(namespace + "." + className -> avroSchema))
+        avroSchema
+      }
+    }
 }
diff --git a/macros/src/main/scala/avro/scala/macro/annotations/record/schemagen/SchemaStore.scala b/macros/src/main/scala/avro/scala/macro/annotations/record/schemagen/SchemaStore.scala
@@ -8,7 +8,6 @@ import collection.JavaConversions._
 import org.apache.avro.Schema
 
 object SchemaStore {
-
   val schemas: scala.collection.concurrent.Map[String, Schema] = scala.collection.convert.Wrappers.JConcurrentMapWrapper(new ConcurrentHashMap[String, Schema]())
 
   def accept(schema: Schema) {

diff --git a/macros/src/main/scala/avro/scala/macro/annotations/record/schemagen/ToJsonMatcher.scala b/macros/src/main/scala/avro/scala/macro/annotations/record/schemagen/ToJsonMatcher.scala
@@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
 package record
 package schemagen
 
+import com.julianpeeters.avro.annotations.provider.FileParser
+
 import scala.reflect.macros.blackbox.Context
 
 import org.codehaus.jackson.JsonNode
@@ -48,12 +50,12 @@ abstract class ToJsonMatcher {
             jsonObject
           }
           // if the default value is another (i.e. nested) record/case class
-          case Apply(Ident(TermName(name)), xs) if SchemaStore.schemas.contains(ns + "." + name) => {
+          case Apply(Ident(TermName(name)), xs) if FileParser.getTypes.contains(ns + "." + name) => {
             val jsonObject = jsonNodeFactory.objectNode
             xs.zipWithIndex.map( x => {
               val value = x._1
               val index = x._2
-              val nestedRecordField = SchemaStore.schemas(ns + "." + name).getFields()(index)
+              val nestedRecordField = FileParser.getTypes.get(ns + "." + name).getFields()(index)
               // values from the tree, field names from cross referencing tree's pos with schema field pos
               // (they always correspond since the schema is defined based on the fields in a class def)
               jsonObject.put(nestedRecordField.name, toJsonNode(value))

diff --git a/tests/src/test/resources/separate/SeparateTestMessage.avsc b/tests/src/test/resources/separate/SeparateTestMessage.avsc
@@ -6,7 +6,7 @@
     {"name": "message", "type": "string"},
     {
       "name": "metaData",
-      "type": "test.SeperateMetadata"
+      "type": "test.SeparateMetaData"
     }
   ]
 }
diff --git a/tests/src/test/scala/AvroTypeProviderTestClasses.scala b/tests/src/test/scala/AvroTypeProviderTestClasses.scala
@@ -352,11 +352,11 @@ case class TestMessage()
 @AvroRecord
 case class MetaData()
 
-// nested record from separte schema files instead of the same schema file
-@AvroTypeProvider("tests/src/test/resources/separate/SeparateTestMessage.avsc")
-@AvroRecord
-case class SeparateTestMessage()
-
+// nested record from separate schema files instead of the same schema file
 @AvroTypeProvider("tests/src/test/resources/separate/SeparateMetaData.avsc")
 @AvroRecord
 case class SeparateMetaData()
+
+@AvroTypeProvider("tests/src/test/resources/separate/SeparateTestMessage.avsc")
+@AvroRecord
+case class SeparateTestMessage()
diff --git a/.../src/test/scala/AvroTypeProviderTests/AvroTypeProviderNestedSeparateSchemaFilesTest.scala b/.../src/test/scala/AvroTypeProviderTests/AvroTypeProviderNestedSeparateSchemaFilesTest.scala
@@ -11,7 +11,7 @@ class AvroTypeProviderNestedSeparateSchemaFilesTest extends Specification {
 
   "A case class with types provided from two separate .avsc files" should {
     "serialize and deserialize correctly" in {
-      val record = SeparteTestMessage("Achilles", SeparateMetaData("ow", "12345"))
+      val record = SeparateTestMessage("Achilles", SeparateMetaData("ow", "12345"))
       TestUtil.verifyWriteAndRead(record)
     }
   }