Skip to content

Commit

Permalink
cached parser and elimination of SchemaStore
Browse files Browse the repository at this point in the history
  • Loading branch information
Cory Parent authored and Cory Parent committed Dec 7, 2015
1 parent e309c47 commit 079be7b
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import com.typesafe.scalalogging._

object AvroTypeProviderMacro extends LazyLogging {

var parsedFiles: Vector[String] = Vector.empty[String]

def impl(c: Context)(annottees: c.Expr[Any]*): c.Expr[Any] = {
import c.universe._
import Flag._
Expand All @@ -38,11 +40,19 @@ object AvroTypeProviderMacro extends LazyLogging {
// helpful for IDE users who may not be able to easily see where their files live
logger.info(s"Current path: ${new File(".").getAbsolutePath}")

// get the schema for the record that this class represents
val avroFilePath = FilePathProbe.getPath(c)
val infile = new File(avroFilePath)
val fileSchemas = FileParser.getSchemas(infile)
val nestedSchemas = fileSchemas.flatMap(NestedSchemaExtractor.getNestedSchemas)
println("file: " + avroFilePath)
// get the schema for the record that this class represents
val nestedSchemas = if (!parsedFiles.contains(avroFilePath)) {
println("creating from file: " + avroFilePath)
val infile = new File(avroFilePath)
val fileSchemas = FileParser.getSchemas(infile)
parsedFiles = parsedFiles :+ avroFilePath
fileSchemas.flatMap(NestedSchemaExtractor.getNestedSchemas)
} else {
println("reusing file: " + avroFilePath)
FileParser.getDefinitionByName(fullName)
}
// first try matching schema record full name to class full name, then by the
// regular name in case we're trying to read from a non-namespaced schema
val classSchema = nestedSchemas.find(s => s.getFullName == fullName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.apache.avro.Schema.Type._

import scala.collection.JavaConverters._

object FileParser {
object FileParser extends Parser {

def getSchemas(infile: java.io.File): List[Schema] = {
val schema = infile.getName.split("\\.").last match {
Expand All @@ -18,18 +18,29 @@ object FileParser {
val dfr = new DataFileReader(infile, gdr)
dfr.getSchema
case "avsc" =>
new Parser().parse(infile)
this.parse(infile)
case _ => throw new Exception("Invalid file ending. Must be .avsc for plain text json files and .avro for binary files.")
}
getDefinition(schema)
}

def getDefinition(schema: Schema): List[Schema] = {
schema.getType match {
case UNION => {
val recordSchemas = schema.getTypes.asScala.toList.filter(_.getType == RECORD)
if (recordSchemas.nonEmpty) recordSchemas
else sys.error("no record type found in the union from " + infile)
else sys.error("no record type found in the union from " + schema.getFullName)
}
case RECORD => List(schema)
case _ => sys.error("The Schema in the datafile is neither a record nor a union of a record type, nothing to map to case class.")
}
}

def getDefinitionByName(fullName: String): List[Schema] = {
if (this.getTypes.containsKey(fullName)) {
getDefinition(this.getTypes.get(fullName))
} else {
sys.error("The schema name provided has not been defined")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
package record
package schemagen

import com.julianpeeters.avro.annotations.provider.FileParser

import scala.reflect.macros.blackbox.Context

import collection.JavaConversions._
Expand Down Expand Up @@ -53,7 +55,8 @@ abstract class FieldSchemaGenerator {
// if a case class (a nested record) is found, reuse the schema that was made and stored when its macro was expanded.
// unsuccessful alternatives: reflectively getting the schema from its companion (can't get a tree from a Symbol),
// or regenerating the schema (no way to get default param values from outside the current at compile time).
SchemaStore.schemas(x.toString)
println(x.toString)
FileParser.getTypes.get(x.toString)
}
case x => throw new UnsupportedOperationException("Could not generate schema. Cannot support yet: " + x )
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
package record
package schemagen

import com.julianpeeters.avro.annotations.provider.FileParser

import collection.JavaConversions._
import java.util.{Arrays => JArrays}
import org.apache.avro.Schema
Expand All @@ -11,13 +13,15 @@ import collection.JavaConversions._

object RecordSchemaGenerator {


def createSchema(className: String, namespace: String, avroFields: List[Field]) = {
val avroSchema = Schema.createRecord(className, "Auto-Generated Schema", namespace, false)
avroSchema.setFields(JArrays.asList(avroFields.toArray:_*))
SchemaStore.accept(avroSchema)
avroSchema
}


if (FileParser.getTypes.containsKey(namespace + "." + className)) {
FileParser.getTypes.get(namespace + "." + className)
} else {
val avroSchema = Schema.createRecord(className, "Auto-Generated Schema", namespace, false)
avroSchema.setFields(JArrays.asList(avroFields.toArray:_*))
FileParser.addTypes(Map(namespace + "." + className -> avroSchema))
avroSchema
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import collection.JavaConversions._
import org.apache.avro.Schema

object SchemaStore {

val schemas: scala.collection.concurrent.Map[String, Schema] = scala.collection.convert.Wrappers.JConcurrentMapWrapper(new ConcurrentHashMap[String, Schema]())

def accept(schema: Schema) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package com.julianpeeters.avro.annotations
package record
package schemagen

import com.julianpeeters.avro.annotations.provider.FileParser

import scala.reflect.macros.blackbox.Context

import org.codehaus.jackson.JsonNode
Expand Down Expand Up @@ -48,12 +50,12 @@ abstract class ToJsonMatcher {
jsonObject
}
// if the default value is another (i.e. nested) record/case class
case Apply(Ident(TermName(name)), xs) if SchemaStore.schemas.contains(ns + "." + name) => {
case Apply(Ident(TermName(name)), xs) if FileParser.getTypes.contains(ns + "." + name) => {
val jsonObject = jsonNodeFactory.objectNode
xs.zipWithIndex.map( x => {
val value = x._1
val index = x._2
val nestedRecordField = SchemaStore.schemas(ns + "." + name).getFields()(index)
val nestedRecordField = FileParser.getTypes.get(ns + "." + name).getFields()(index)
// values from the tree, field names from cross referencing tree's pos with schema field pos
// (they always correspond since the schema is defined based on the fields in a class def)
jsonObject.put(nestedRecordField.name, toJsonNode(value))
Expand Down
2 changes: 1 addition & 1 deletion tests/src/test/resources/separate/SeparateTestMessage.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{"name": "message", "type": "string"},
{
"name": "metaData",
"type": "test.SeperateMetadata"
"type": "test.SeparateMetaData"
}
]
}
10 changes: 5 additions & 5 deletions tests/src/test/scala/AvroTypeProviderTestClasses.scala
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,11 @@ case class TestMessage()
@AvroRecord
case class MetaData()

// nested record from separte schema files instead of the same schema file
@AvroTypeProvider("tests/src/test/resources/separate/SeparateTestMessage.avsc")
@AvroRecord
case class SeparateTestMessage()

// nested record from separate schema files instead of the same schema file
@AvroTypeProvider("tests/src/test/resources/separate/SeparateMetaData.avsc")
@AvroRecord
case class SeparateMetaData()

@AvroTypeProvider("tests/src/test/resources/separate/SeparateTestMessage.avsc")
@AvroRecord
case class SeparateTestMessage()
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class AvroTypeProviderNestedSeparateSchemaFilesTest extends Specification {

"A case class with types provided from two separate .avsc files" should {
"serialize and deserialize correctly" in {
val record = SeparteTestMessage("Achilles", SeparateMetaData("ow", "12345"))
val record = SeparateTestMessage("Achilles", SeparateMetaData("ow", "12345"))
TestUtil.verifyWriteAndRead(record)
}
}
Expand Down

0 comments on commit 079be7b

Please sign in to comment.