From e7de3f850f66afbb72813e542c21e45d3bf670e6 Mon Sep 17 00:00:00 2001 From: Kevin Schultz Date: Wed, 9 Dec 2015 16:01:59 -0500 Subject: [PATCH] AVRO-1493: Java: Schema fingerprint vary by locale --- CHANGES.txt | 4 +++- .../src/main/java/org/apache/avro/Schema.java | 9 ++++---- .../apache/avro/TestSchemaNormalization.java | 21 +++++++++++++++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 110f9c3d8a9..51c370e5cf8 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -12,6 +12,8 @@ Trunk (not yet released) BUG FIXES + AVRO-1493. Java: Avoid the "Turkish Locale Problem". Schema fingerprints are + now consistent regardless of the environment's locale. Avro 1.8.0 (22 January 2016) @@ -46,7 +48,7 @@ Avro 1.8.0 (22 January 2016) (Ryan Blue via cutting) AVRO-570. Python: Add connector for tethered mapreduce. - (Jeremy Lewi and Steven Willis via cutting) + (Jeremy Lewi and Steven Willis via cutting) AVRO-834. Java: Data File corruption recovery tool. (scottcarey and tomwhite) diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 9a201ce45fa..600f7aaffe4 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -32,6 +32,7 @@ import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; @@ -93,7 +94,7 @@ public enum Type { RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL; private String name; - private Type() { this.name = this.name().toLowerCase(); } + private Type() { this.name = this.name().toLowerCase(Locale.ENGLISH); } public String getName() { return name; } }; @@ -377,7 +378,7 @@ public static class Field extends JsonProperties { public enum Order { ASCENDING, DESCENDING, IGNORE; private String name; - private Order() { this.name = this.name().toLowerCase(); } + private Order() { this.name = this.name().toLowerCase(Locale.ENGLISH); } }; private final String name; // name of the field. @@ -749,7 +750,7 @@ public EnumSchema(Name name, String doc, throw new SchemaParseException("Duplicate enum symbol: "+symbol); } public List getEnumSymbols() { return symbols; } - public boolean hasEnumSymbol(String symbol) { + public boolean hasEnumSymbol(String symbol) { return ordinals.containsKey(symbol); } public int getEnumOrdinal(String symbol) { return ordinals.get(symbol); } public boolean equals(Object o) { @@ -1269,7 +1270,7 @@ static Schema parse(JsonNode schema, Names names) { Field.Order order = Field.Order.ASCENDING; JsonNode orderNode = field.get("order"); if (orderNode != null) - order = Field.Order.valueOf(orderNode.getTextValue().toUpperCase()); + order = Field.Order.valueOf(orderNode.getTextValue().toUpperCase(Locale.ENGLISH)); JsonNode defaultValue = field.get("default"); if (defaultValue != null && (Type.FLOAT.equals(fieldSchema.getType()) diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java index 405d74d1f74..f8c04130d89 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaNormalization.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.ArrayList; import java.util.Formatter; +import java.util.Locale; import org.junit.Test; import org.junit.runner.RunWith; @@ -68,6 +69,26 @@ public static class TestFingerprint { } } + // see AVRO-1493 + @RunWith(Parameterized.class) + public static class TestFingerprintInternationalization { + String input, expectedOutput; + public TestFingerprintInternationalization(String i, String o) { input=i; expectedOutput=o; } + + @Parameters public static List cases() throws IOException + { return CaseFinder.find(data(),"fingerprint",new ArrayList()); } + + @Test public void testCanonicalization() throws Exception { + Locale originalDefaultLocale = Locale.getDefault(); + Locale.setDefault(Locale.forLanguageTag("tr")); + Schema s = Schema.parse(input); + long carefulFP = altFingerprint(SchemaNormalization.toParsingForm(s)); + assertEquals(carefulFP, Long.parseLong(expectedOutput)); + assertEqHex(carefulFP, SchemaNormalization.parsingFingerprint64(s)); + Locale.setDefault(originalDefaultLocale); + } + } + private static String DATA_FILE = (System.getProperty("share.dir", "../../../share") + "/test/data/schema-tests.txt");