Skip to content

Commit

Permalink
AVRO-3677: Add SchemaFormatter (#2885)
Browse files Browse the repository at this point in the history
* AVRO-3677: Introduce Named Schema Formatters

Adds a SchemaFormatter interface and factory method to format schemas to
different formats by name. The initial implementation supports JSON
(both inline and pretty printed), the parsing canonical form, and the IDL
format.
  • Loading branch information
opwvhk committed May 6, 2024
1 parent 9f9023c commit 362aef8
Show file tree
Hide file tree
Showing 19 changed files with 1,449 additions and 7 deletions.
2 changes: 1 addition & 1 deletion doc/themes/docsy
Submodule docsy updated 271 files
3 changes: 3 additions & 0 deletions lang/java/avro/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
<include>org/apache/avro/data/Json.avsc</include>
</includes>
</resource>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

/**
* Schema formatter factory that supports the "Parsing Canonical Form".
*
* @see <a href=
* "https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas">Specification:
* Parsing Canonical Form for Schemas</a>
*/
public class CanonicalSchemaFormatterFactory implements SchemaFormatterFactory, SchemaFormatter {
@Override
public SchemaFormatter getDefaultFormatter() {
return this;
}

@Override
public String format(Schema schema) {
return SchemaNormalization.toParsingForm(schema);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

public class JsonSchemaFormatter implements SchemaFormatter {
private final boolean prettyPrinted;

public JsonSchemaFormatter(boolean prettyPrinted) {
this.prettyPrinted = prettyPrinted;
}

@Override
public String format(Schema schema) {
// TODO: Move the toString implementation here and have Schema#toString()
// use SchemaFormatter with the formats "json/pretty" and "json/inline"
return schema.toString(prettyPrinted);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

public class JsonSchemaFormatterFactory implements SchemaFormatterFactory {

public static final String VARIANT_NAME_PRETTY = "pretty";
public static final String VARIANT_NAME_INLINE = "inline";

@Override
public SchemaFormatter getDefaultFormatter() {
return getFormatterForVariant(VARIANT_NAME_PRETTY);
}

@Override
public SchemaFormatter getFormatterForVariant(String variantName) {
if (VARIANT_NAME_PRETTY.equals(variantName)) {
return new JsonSchemaFormatter(true);
} else if (VARIANT_NAME_INLINE.equals(variantName)) {
return new JsonSchemaFormatter(false);
} else {
throw new AvroRuntimeException("Unknown JSON variant: " + variantName);
}
}
}
24 changes: 23 additions & 1 deletion lang/java/avro/src/main/java/org/apache/avro/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,16 @@ public int getFixedSize() {
throw new AvroRuntimeException("Not fixed: " + this);
}

/** Render this as <a href="https://json.org/">JSON</a>. */
/**
* <p>
* Render this as <a href="https://json.org/">JSON</a>.
* </p>
*
* <p>
* This method is equivalent to:
* {@code SchemaFormatter.getInstance("json").format(this)}
* </p>
*/
@Override
public String toString() {
return toString(false);
Expand All @@ -403,7 +412,10 @@ public String toString() {
* Render this as <a href="https://json.org/">JSON</a>.
*
* @param pretty if true, pretty-print JSON.
* @deprecated Use {@link SchemaFormatter#format(Schema)} instead, using the
* format {@code json/pretty} or {@code json/inline}
*/
@Deprecated
public String toString(boolean pretty) {
return toString(new HashSet<String>(), pretty);
}
Expand All @@ -427,6 +439,7 @@ public String toString(Collection<Schema> referencedSchemas, boolean pretty) {
return toString(knownNames, pretty);
}

@Deprecated
String toString(Set<String> knownNames, boolean pretty) {
try {
StringWriter writer = new StringWriter();
Expand All @@ -441,6 +454,7 @@ String toString(Set<String> knownNames, boolean pretty) {
}
}

@Deprecated
void toJson(Set<String> knownNames, String namespace, JsonGenerator gen) throws IOException {
if (!hasProps()) { // no props defined
gen.writeString(getName()); // just write name
Expand All @@ -452,6 +466,7 @@ void toJson(Set<String> knownNames, String namespace, JsonGenerator gen) throws
}
}

@Deprecated
void fieldsToJson(Set<String> knownNames, String namespace, JsonGenerator gen) throws IOException {
throw new AvroRuntimeException("Not a record: " + this);
}
Expand Down Expand Up @@ -1012,6 +1027,7 @@ int computeHash() {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen) throws IOException {
if (writeNameRef(knownNames, currentNamespace, gen))
return;
Expand All @@ -1033,6 +1049,7 @@ void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen)
}

@Override
@Deprecated
void fieldsToJson(Set<String> knownNames, String namespace, JsonGenerator gen) throws IOException {
gen.writeStartArray();
for (Field f : fields) {
Expand Down Expand Up @@ -1120,6 +1137,7 @@ int computeHash() {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen) throws IOException {
if (writeNameRef(knownNames, currentNamespace, gen))
return;
Expand Down Expand Up @@ -1169,6 +1187,7 @@ int computeHash() {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String namespace, JsonGenerator gen) throws IOException {
gen.writeStartObject();
gen.writeStringField("type", "array");
Expand Down Expand Up @@ -1208,6 +1227,7 @@ int computeHash() {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen) throws IOException {
gen.writeStartObject();
gen.writeStringField("type", "map");
Expand Down Expand Up @@ -1285,6 +1305,7 @@ public void addProp(String name, String value) {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen) throws IOException {
gen.writeStartArray();
for (Schema type : types)
Expand Down Expand Up @@ -1329,6 +1350,7 @@ int computeHash() {
}

@Override
@Deprecated
void toJson(Set<String> knownNames, String currentNamespace, JsonGenerator gen) throws IOException {
if (writeNameRef(knownNames, currentNamespace, gen))
return;
Expand Down
127 changes: 127 additions & 0 deletions lang/java/avro/src/main/java/org/apache/avro/SchemaFormatter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;

import java.util.Locale;
import java.util.ServiceLoader;

/**
* Interface and factory to format schemas to text.
*
* <p>
* Schema formats have a name, and optionally a variant (all lowercase). The
* Avro library supports a few formats out of the box:
* </p>
*
* <dl>
*
* <dt>{@code json}</dt>
* <dd>Classic schema definition (which is a form of JSON). Supports the
* variants {@code pretty} (the default) and {@code inline}. Can be written as
* .avsc files. See the specification (<a href=
* "https://avro.apache.org/docs/current/specification/#schema-declaration">Schema
* Declaration</a>) for more details.</dd>
*
* <dt>{@code canonical}</dt>
* <dd>Parsing Canonical Form; this uniquely defines how Avro data is written.
* Used to generate schema fingerprints.<br/>
* See the specification (<a href=
* "https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas">Parsing
* Canonical Form for Schemas</a>) for more details.</dd>
*
* <dt>{@code idl}</dt>
* <dd>IDL: a format that looks much like source code, and is arguably easier to
* read than JSON. Available when the module {@code avro-idl} is on the
* classpath. See
* <a href="https://avro.apache.org/docs/current/idl-language/">IDL Language</a>
* for more details.</dd>
*
* </dl>
*
* <p>
* Additional formats can be defined by implementing
* {@link SchemaFormatterFactory}.
* </p>
*
* @see <a href=
* "https://avro.apache.org/docs/current/specification/#schema-declaration">Specification:
* Schema Declaration</a>
* @see <a href=
* "https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas">Specification:
* Parsing Canonical Form for Schemas</a>
* @see <a href="https://avro.apache.org/docs/current/idl-language/">IDL
* Language</a>
*/
public interface SchemaFormatter {
/**
* Get the schema formatter for the specified format name with optional variant.
*
* @param name a format with optional variant, for example "json/pretty",
* "canonical" or "idl"
* @return the schema formatter for the specified format
* @throws AvroRuntimeException if the schema format is not supported
*/
static SchemaFormatter getInstance(String name) {
int slashPos = name.indexOf("/");
// SchemaFormatterFactory.getFormatterForVariant(String) receives the name of
// the variant in lowercase (as stated in its javadoc). We're doing a
// case-insensitive comparison on the format name instead, so we don't have to
// convert the format name provided by the factory to lower case.
// This ensures the least amount of assumptions about implementations.
String formatName = slashPos < 0 ? name : name.substring(0, slashPos);
String variantName = slashPos < 0 ? null : name.substring(slashPos + 1).toLowerCase(Locale.ROOT);

for (SchemaFormatterFactory formatterFactory : SchemaFormatterCache.LOADER) {
if (formatName.equalsIgnoreCase(formatterFactory.formatName())) {
if (variantName == null) {
return formatterFactory.getDefaultFormatter();
} else {
return formatterFactory.getFormatterForVariant(variantName);
}
}
}
throw new AvroRuntimeException("Unsupported schema format: " + name + "; see the javadoc for valid examples");
}

/**
* Format a schema with the specified format. Shorthand for
* {@code getInstance(name).format(schema)}.
*
* @param name the name of the schema format
* @param schema the schema to format
* @return the formatted schema
* @throws AvroRuntimeException if the schema format is not supported
* @see #getInstance(String)
* @see #format(Schema)
*/
static String format(String name, Schema schema) {
return getInstance(name).format(schema);
}

/**
* Write the specified schema as a String.
*
* @param schema the schema to write
* @return the formatted schema
*/
String format(Schema schema);
}

class SchemaFormatterCache {
static final ServiceLoader<SchemaFormatterFactory> LOADER = ServiceLoader.load(SchemaFormatterFactory.class);
}

0 comments on commit 362aef8

Please sign in to comment.