Skip to content

Commit

Permalink
AVRO-924. Java: Support reading & writing arbitrary JSON data using a…
Browse files Browse the repository at this point in the history
…n efficient Avro binary representation.

git-svn-id: https://svn.apache.org/repos/asf/avro/trunk@1185364 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
cutting committed Oct 17, 2011
1 parent ee35b21 commit 60c0c54
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ Avro 1.6.0 (unreleased)
achived by specifying <stringType>String</stringType> in
avro-maven-plugin's pom.xml configuration. (cutting)

AVRO-924. Java: Support reading & writing arbitrary JSON data
using an efficient Avro binary representation. (cutting)

OPTIMIZATIONS

AVRO-853: Java: Cache Schema hash codes. (cutting)
Expand Down
8 changes: 8 additions & 0 deletions lang/java/avro/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@
</dependencies>

<build>
<resources>
<resource>
<directory>../../../share/schemas</directory>
<includes>
<include>org/apache/avro/data/Json.avsc</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>com.thoughtworks.paranamer</groupId>
Expand Down
184 changes: 184 additions & 0 deletions lang/java/avro/src/main/java/org/apache/avro/data/Json.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.data;

import java.io.IOException;
import java.util.Iterator;

import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.JsonNodeFactory;
import org.codehaus.jackson.node.LongNode;
import org.codehaus.jackson.node.DoubleNode;
import org.codehaus.jackson.node.TextNode;
import org.codehaus.jackson.node.BooleanNode;
import org.codehaus.jackson.node.NullNode;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;

import org.apache.avro.Schema;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.ResolvingDecoder;

/** Utilities for reading and writing arbitrary Json data in Avro format. */
public class Json {
private Json() {} // singleton: no public ctor

/** The schema for Json data. */
public static final Schema SCHEMA;
static {
try {
SCHEMA = Schema.parse
(Json.class.getResourceAsStream("/org/apache/avro/data/Json.avsc"));
} catch (IOException e) {
throw new AvroRuntimeException(e);
}
}

/** {@link DatumWriter} for arbitrary Json data. */
public static class Writer implements DatumWriter<JsonNode> {

@Override public void setSchema(Schema schema) {
if (!SCHEMA.equals(schema))
throw new RuntimeException("Not the Json schema: "+schema);
}

@Override
public void write(JsonNode datum, Encoder out) throws IOException {
Json.write(datum, out);
}
}

/** {@link DatumReader} for arbitrary Json data. */
public static class Reader implements DatumReader<JsonNode> {
private Schema written;
private ResolvingDecoder resolver;

@Override public void setSchema(Schema schema) {
this.written = SCHEMA.equals(written) ? null : schema;
}

@Override
public JsonNode read(JsonNode reuse, Decoder in) throws IOException {
if (written == null) // same schema
return Json.read(in);

// use a resolver to adapt alternate version of Json schema
if (resolver == null)
resolver = DecoderFactory.get().resolvingDecoder(written, SCHEMA, null);
resolver.configure(in);
JsonNode result = Json.read(resolver);
resolver.drain();
return result;
}
}

/** Note: this enum must be kept aligned with the union in Json.avsc. */
private enum JsonType { LONG, DOUBLE, STRING, BOOLEAN, NULL, ARRAY, OBJECT }

/** Write Json data as Avro data. */
public static void write(JsonNode node, Encoder out) throws IOException {
switch(node.asToken()) {
case VALUE_NUMBER_INT:
out.writeIndex(JsonType.LONG.ordinal());
out.writeLong(node.getLongValue());
break;
case VALUE_NUMBER_FLOAT:
out.writeIndex(JsonType.DOUBLE.ordinal());
out.writeDouble(node.getDoubleValue());
break;
case VALUE_STRING:
out.writeIndex(JsonType.STRING.ordinal());
out.writeString(node.getTextValue());
break;
case VALUE_TRUE:
out.writeIndex(JsonType.BOOLEAN.ordinal());
out.writeBoolean(true);
break;
case VALUE_FALSE:
out.writeIndex(JsonType.BOOLEAN.ordinal());
out.writeBoolean(false);
break;
case VALUE_NULL:
out.writeIndex(JsonType.NULL.ordinal());
out.writeNull();
break;
case START_ARRAY:
out.writeIndex(JsonType.ARRAY.ordinal());
out.writeArrayStart();
out.setItemCount(node.size());
for (JsonNode element : node) {
out.startItem();
write(element, out);
}
out.writeArrayEnd();
break;
case START_OBJECT:
out.writeIndex(JsonType.OBJECT.ordinal());
out.writeMapStart();
out.setItemCount(node.size());
Iterator<String> i = node.getFieldNames();
while (i.hasNext()) {
out.startItem();
String name = i.next();
out.writeString(name);
write(node.get(name), out);
}
out.writeMapEnd();
break;
default:
throw new AvroRuntimeException(node.asToken()+" unexpected: "+node);
}
}

/** Read Json data from Avro data. */
public static JsonNode read(Decoder in) throws IOException {
switch (JsonType.values()[in.readIndex()]) {
case LONG:
return new LongNode(in.readLong());
case DOUBLE:
return new DoubleNode(in.readDouble());
case STRING:
return new TextNode(in.readString());
case BOOLEAN:
return in.readBoolean() ? BooleanNode.TRUE : BooleanNode.FALSE;
case NULL:
in.readNull();
return NullNode.getInstance();
case ARRAY:
ArrayNode array = JsonNodeFactory.instance.arrayNode();
for (long l = in.readArrayStart(); l > 0; l = in.arrayNext())
for (long i = 0; i < l; i++)
array.add(read(in));
return array;
case OBJECT:
ObjectNode object = JsonNodeFactory.instance.objectNode();
for (long l = in.readMapStart(); l > 0; l = in.mapNext())
for (long i = 0; i < l; i++)
object.put(in.readString(), read(in));
return object;
default:
throw new AvroRuntimeException("Unexpected Json node type");
}
}

}
1 change: 1 addition & 0 deletions lang/java/ipc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
<configuration>
<excludes>
<exclude>**/mapred/tether/**</exclude>
<exclude>org/apache/avro/data/Json.avsc</exclude>
</excludes>
<stringType>String</stringType>
<sourceDirectory>${parent.project.basedir}/../../../../share/schemas/</sourceDirectory>
Expand Down
25 changes: 24 additions & 1 deletion lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import java.util.List;
import java.util.Collection;

import org.codehaus.jackson.JsonNode;

import org.apache.avro.Schema.Type;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
Expand All @@ -45,6 +47,7 @@
import org.apache.avro.io.Decoder;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.data.Json;
import org.apache.avro.compiler.specific.TestSpecificCompiler;
import org.apache.avro.util.Utf8;

Expand Down Expand Up @@ -457,7 +460,6 @@ public void testNamespaceNesting() throws Exception {
+"{\"name\":\"f\",\"type\":"+y+"}"
+"]}";
Schema xs = Schema.parse(x);
System.out.println(xs);
assertEquals(xs, Schema.parse(xs.toString()));
}

Expand Down Expand Up @@ -610,6 +612,9 @@ private static void check(String jsonSchema, boolean induce)

// Check that we can generate the code for every schema we see.
TestSpecificCompiler.assertCompiles(schema, false);

// Check that we can read/write the json of every schema we see.
checkBinaryJson(jsonSchema);
}
}

Expand Down Expand Up @@ -734,6 +739,24 @@ private static void checkJson(Schema schema, Object datum,
assertEquals("Decoded data does not match.", datum, decoded);
}

public static void checkBinaryJson(String json) throws Exception {
JsonNode node = Schema.parseJson(json);
ByteArrayOutputStream out = new ByteArrayOutputStream();
DatumWriter<JsonNode> writer = new Json.Writer();
Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
encoder = EncoderFactory.get().validatingEncoder(Json.SCHEMA, encoder);
writer.write(node, encoder);
encoder.flush();
byte[] bytes = out.toByteArray();

DatumReader<JsonNode> reader = new Json.Reader();
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
decoder = DecoderFactory.get().validatingDecoder(Json.SCHEMA, decoder);
JsonNode decoded = reader.read(null, decoder);

assertEquals("Decoded json does not match.", node.toString(), decoded.toString());
}

private static final Schema ACTUAL = // an empty record schema
Schema.parse("{\"type\":\"record\", \"name\":\"Foo\", \"fields\":[]}");

Expand Down
15 changes: 15 additions & 0 deletions share/schemas/org/apache/avro/data/Json.avsc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{"type": "record", "name": "Json", "namespace":"org.apache.avro.data",
"fields": [
{"name": "value",
"type": [
"long",
"double",
"string",
"boolean",
"null",
{"type": "array", "items": "Json"},
{"type": "map", "values": "Json"}
]
}
]
}

0 comments on commit 60c0c54

Please sign in to comment.