From a2718fed6f8e32073ae71292b49d843adb65643d Mon Sep 17 00:00:00 2001 From: Simon Woodford Date: Mon, 23 May 2016 23:22:33 +0000 Subject: [PATCH 1/6] AVRO-1849 Fix the issue where converting the schema of a record with no fields produced an invalid JSON --- lang/c++/impl/NodeImpl.cc | 11 +++--- lang/c++/test/SchemaTests.cc | 68 ++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/lang/c++/impl/NodeImpl.cc b/lang/c++/impl/NodeImpl.cc index aba2a7366d3..606cd2093ed 100644 --- a/lang/c++/impl/NodeImpl.cc +++ b/lang/c++/impl/NodeImpl.cc @@ -189,23 +189,22 @@ NodeRecord::printJson(std::ostream &os, int depth) const os << "{\n"; os << indent(++depth) << "\"type\": \"record\",\n"; printName(os, nameAttribute_.get(), depth); - os << indent(depth) << "\"fields\": [\n"; + os << indent(depth) << "\"fields\": ["; int fields = leafAttributes_.size(); ++depth; for(int i = 0; i < fields; ++i) { if(i > 0) { - os << indent(depth) << "},\n"; + os << ','; } - os << indent(depth) << "{\n"; + os << '\n' << indent(depth) << "{\n"; os << indent(++depth) << "\"name\": \"" << leafNameAttributes_.get(i) << "\",\n"; os << indent(depth) << "\"type\": "; leafAttributes_.get(i)->printJson(os, depth); os << '\n'; - --depth; + os << indent(--depth) << '}'; } - os << indent(depth) << "}\n"; - os << indent(--depth) << "]\n"; + os << '\n' << indent(--depth) << "]\n"; os << indent(--depth) << '}'; } diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc index 3d483b40674..a199aaba5cd 100644 --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -23,7 +23,6 @@ #include #include - namespace avro { namespace schema { @@ -48,6 +47,7 @@ const char* basicSchemas[] = { "{ \"type\": \"string\" }", // Record + "{\"type\": \"record\",\"name\": \"Test\",\"fields\": []}", "{\"type\": \"record\",\"name\": \"Test\",\"fields\": " "[{\"name\": \"f\",\"type\": \"long\"}]}", "{\"type\": \"record\",\"name\": \"Test\",\"fields\": " @@ -136,6 +136,57 @@ const char* basicSchemaErrors[] = { "{\"type\": \"fixed\", \"size\": 314}", }; +const char* roundTripSchemas[] = { + "\"null\"", + "\"boolean\"", + "\"int\"", + "\"long\"", + "\"float\"", + "\"double\"", + "\"bytes\"", + "\"string\"", + // Record + "{\"type\":\"record\",\"name\":\"Test\",\"fields\":[]}", + "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" + "[{\"name\":\"f\",\"type\":\"long\"}]}", + "{\"type\":\"record\",\"name\":\"Test\",\"fields\":" + "[{\"name\":\"f1\",\"type\":\"long\"}," + "{\"name\":\"f2\",\"type\":\"int\"}]}", +/* Avro-C++ cannot do a round-trip on error schemas. + * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":" + "[{\"name\":\"f1\",\"type\":\"long\"}," + "{\"name\":\"f2\",\"type\":\"int\"}]}" +*/ + // Recursive. + "{\"type\":\"record\",\"name\":\"LongList\"," + "\"fields\":[{\"name\":\"value\",\"type\":\"long\"}," + "{\"name\":\"next\",\"type\":[\"LongList\",\"null\"]}]}", + // Enum + "{\"type\":\"enum\",\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}", + + // Array + "{\"type\":\"array\",\"items\":\"long\"}", + "{\"type\":\"array\",\"items\":{\"type\":\"enum\"," + "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + + // Map + "{\"type\":\"map\",\"values\":\"long\"}", + "{\"type\":\"map\",\"values\":{\"type\":\"enum\"," + "\"name\":\"Test\",\"symbols\":[\"A\",\"B\"]}}", + + // Union + "[\"string\",\"null\",\"long\"]", + + // Fixed + "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}", + "{\"type\":\"fixed\",\"namespace\":\"org.apache.hadoop.avro\"," + "\"name\":\"MyFixed\",\"size\":1}", + "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}", + "{\"type\":\"fixed\",\"name\":\"Test\",\"size\":1}" +}; + + + static void testBasic(const char* schema) { BOOST_CHECKPOINT(schema); @@ -154,6 +205,19 @@ static void testCompile(const char* schema) compileJsonSchemaFromString(std::string(schema)); } +// Test that the JSON output from a valid schema matches the JSON that was +// used to construct it, apart from whitespace changes. +static void testRoundTrip(const char* schema) +{ + BOOST_CHECKPOINT(schema); + avro::ValidSchema compiledSchema = compileJsonSchemaFromString(std::string(schema)); + std::ostringstream os; + compiledSchema.toJson(os); + std::string result = os.str(); + result.erase( std::remove_if( result.begin(), result.end(), ::isspace ), result.end() ); // Remove whitespace + BOOST_CHECK(result == std::string(schema)); +} + } } @@ -173,6 +237,6 @@ init_unit_test_suite(int argc, char* argv[]) ADD_PARAM_TEST(ts, avro::schema::testBasic_fail, avro::schema::basicSchemaErrors); ADD_PARAM_TEST(ts, avro::schema::testCompile, avro::schema::basicSchemas); - + ADD_PARAM_TEST(ts, avro::schema::testRoundTrip, avro::schema::roundTripSchemas); return ts; } From 39c5195c4ac0d404296e88aa7beda91308405cd1 Mon Sep 17 00:00:00 2001 From: srw Date: Thu, 22 Sep 2016 23:38:43 +0100 Subject: [PATCH 2/6] Fix style issues in the code. --- lang/c++/test/SchemaTests.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc index f56e780067b..e9d9aae5492 100644 --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -154,9 +154,9 @@ const char* roundTripSchemas[] = { "{\"name\":\"f2\",\"type\":\"int\"}]}", /* Avro-C++ cannot do a round-trip on error schemas. * "{\"type\":\"error\",\"name\":\"Test\",\"fields\":" - "[{\"name\":\"f1\",\"type\":\"long\"}," - "{\"name\":\"f2\",\"type\":\"int\"}]}" -*/ + * "[{\"name\":\"f1\",\"type\":\"long\"}," + * "{\"name\":\"f2\",\"type\":\"int\"}]}" + */ // Recursive. "{\"type\":\"record\",\"name\":\"LongList\"," "\"fields\":[{\"name\":\"value\",\"type\":\"long\"}," @@ -214,7 +214,7 @@ static void testRoundTrip(const char* schema) std::ostringstream os; compiledSchema.toJson(os); std::string result = os.str(); - result.erase( std::remove_if( result.begin(), result.end(), ::isspace ), result.end() ); // Remove whitespace + result.erase(std::remove_if(result.begin(), result.end(), ::isspace), result.end()); // Remove whitespace BOOST_CHECK(result == std::string(schema)); } From e80fd1aa4c9cc8b5cccbb35a33903d6988109172 Mon Sep 17 00:00:00 2001 From: srw Date: Sat, 24 Sep 2016 01:47:55 +0100 Subject: [PATCH 3/6] Fix the build scripts; build.sh requires the :z addition to work on SELinux (see Jira 1925). lang/c++/build.sh refers to the lang/c++/build directory which is empty. These are now fixed. --- build.sh | 6 +++--- lang/c++/build.sh | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/build.sh b/build.sh index c0c31df5385..13192752ebe 100755 --- a/build.sh +++ b/build.sh @@ -223,10 +223,10 @@ UserSpecificDocker # system. And this also is a significant speedup in subsequent # builds because the dependencies are downloaded only once. docker run --rm=true -t -i \ - -v ${PWD}:/home/${USER_NAME}/avro \ + -v ${PWD}:/home/${USER_NAME}/avro:z \ -w /home/${USER_NAME}/avro \ - -v ${HOME}/.m2:/home/${USER_NAME}/.m2 \ - -v ${HOME}/.gnupg:/home/${USER_NAME}/.gnupg \ + -v ${HOME}/.m2:/home/${USER_NAME}/.m2:z \ + -v ${HOME}/.gnupg:/home/${USER_NAME}/.gnupg:z \ -u ${USER_NAME} \ avro-build-${USER_NAME} ;; diff --git a/lang/c++/build.sh b/lang/c++/build.sh index 2c988856e04..ac3ab53def7 100755 --- a/lang/c++/build.sh +++ b/lang/c++/build.sh @@ -76,14 +76,14 @@ function do_dist() { case "$target" in test) - (cd build && make && cd .. \ - && ./build/buffertest \ - && ./build/unittest \ - && ./build/CodecTests \ - && ./build/StreamTests \ - && ./build/SpecificTests \ - && ./build/AvrogencppTests \ - && ./build/DataFileTests) + ( make \ + && ./buffertest \ + && ./unittest \ + && ./CodecTests \ + && ./StreamTests \ + && ./SpecificTests \ + && ./AvrogencppTests \ + && ./DataFileTests) ;; dist) From f7826c623a73c8daccdaae618f8eeb58cd601dd7 Mon Sep 17 00:00:00 2001 From: srw Date: Sat, 24 Sep 2016 01:54:57 +0100 Subject: [PATCH 4/6] Update to use BOOST_TEST_CHECKPOINT --- lang/c++/test/SchemaTests.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/c++/test/SchemaTests.cc b/lang/c++/test/SchemaTests.cc index e9d9aae5492..8ecde7a6e08 100644 --- a/lang/c++/test/SchemaTests.cc +++ b/lang/c++/test/SchemaTests.cc @@ -209,7 +209,7 @@ static void testCompile(const char* schema) // used to construct it, apart from whitespace changes. static void testRoundTrip(const char* schema) { - BOOST_CHECKPOINT(schema); + BOOST_TEST_CHECKPOINT(schema); avro::ValidSchema compiledSchema = compileJsonSchemaFromString(std::string(schema)); std::ostringstream os; compiledSchema.toJson(os); From 3ad5aaca13a0b82b42090139b7f1369e0e65427d Mon Sep 17 00:00:00 2001 From: srw Date: Mon, 26 Sep 2016 23:16:44 +0100 Subject: [PATCH 5/6] AVRO-1926 Revert changes to the lang/c++/build.sh script and add the SchemaTests to the list of tests. Also revert SELinux changes to build.sh as these should be committed separately --- build.sh | 6 +++--- lang/c++/build.sh | 17 +++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/build.sh b/build.sh index 13192752ebe..c0c31df5385 100755 --- a/build.sh +++ b/build.sh @@ -223,10 +223,10 @@ UserSpecificDocker # system. And this also is a significant speedup in subsequent # builds because the dependencies are downloaded only once. docker run --rm=true -t -i \ - -v ${PWD}:/home/${USER_NAME}/avro:z \ + -v ${PWD}:/home/${USER_NAME}/avro \ -w /home/${USER_NAME}/avro \ - -v ${HOME}/.m2:/home/${USER_NAME}/.m2:z \ - -v ${HOME}/.gnupg:/home/${USER_NAME}/.gnupg:z \ + -v ${HOME}/.m2:/home/${USER_NAME}/.m2 \ + -v ${HOME}/.gnupg:/home/${USER_NAME}/.gnupg \ -u ${USER_NAME} \ avro-build-${USER_NAME} ;; diff --git a/lang/c++/build.sh b/lang/c++/build.sh index ac3ab53def7..c8281b4face 100755 --- a/lang/c++/build.sh +++ b/lang/c++/build.sh @@ -76,14 +76,15 @@ function do_dist() { case "$target" in test) - ( make \ - && ./buffertest \ - && ./unittest \ - && ./CodecTests \ - && ./StreamTests \ - && ./SpecificTests \ - && ./AvrogencppTests \ - && ./DataFileTests) + (cd build && make && cd .. \ + && ./build/buffertest \ + && ./build/unittest \ + && ./build/CodecTests \ + && ./build/StreamTests \ + && ./build/SpecificTests \ + && ./build/AvrogencppTests \ + && ./build/DataFileTests \ + && ./build/SchemaTests) ;; dist) From ddf292c5e128d15d0fd480861e1b0253a8db424f Mon Sep 17 00:00:00 2001 From: srw Date: Fri, 11 Nov 2016 22:59:04 +0000 Subject: [PATCH 6/6] AVRO-1363 Fix the C# parsing of a union schema that contains duplicate names but in different namespaces. The full name is now used when parsing the schema --- lang/csharp/src/apache/main/Schema/NamedSchema.cs | 2 +- lang/csharp/src/apache/main/Schema/Schema.cs | 12 ++++++++++-- lang/csharp/src/apache/main/Schema/UnionSchema.cs | 2 +- lang/csharp/src/apache/test/Schema/SchemaTests.cs | 3 ++- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lang/csharp/src/apache/main/Schema/NamedSchema.cs b/lang/csharp/src/apache/main/Schema/NamedSchema.cs index 02beb798400..b75483f8461 100644 --- a/lang/csharp/src/apache/main/Schema/NamedSchema.cs +++ b/lang/csharp/src/apache/main/Schema/NamedSchema.cs @@ -52,7 +52,7 @@ public string Namespace /// /// Namespace.Name of the schema /// - public string Fullname + public override string Fullname { get { return SchemaName.Fullname; } } diff --git a/lang/csharp/src/apache/main/Schema/Schema.cs b/lang/csharp/src/apache/main/Schema/Schema.cs index fc0e237924d..d2d2f079c6f 100644 --- a/lang/csharp/src/apache/main/Schema/Schema.cs +++ b/lang/csharp/src/apache/main/Schema/Schema.cs @@ -70,12 +70,20 @@ protected Schema(Type type, PropertyMap props) this.Props = props; } + /// + /// If this is a record, enum or fixed, returns its name, otherwise the name the primitive type. + /// + public abstract string Name { get; } + /// /// The name of this schema. If this is a named schema such as an enum, it returns the fully qualified /// name for the schema. For other schemas, it returns the type of the schema. /// - public abstract string Name { get; } - + public virtual string Fullname + { + get { return Name; } + } + /// /// Static class to return new instance of schema object /// diff --git a/lang/csharp/src/apache/main/Schema/UnionSchema.cs b/lang/csharp/src/apache/main/Schema/UnionSchema.cs index df2c37fcefa..bc2ab5b9bc7 100644 --- a/lang/csharp/src/apache/main/Schema/UnionSchema.cs +++ b/lang/csharp/src/apache/main/Schema/UnionSchema.cs @@ -56,7 +56,7 @@ internal static UnionSchema NewInstance(JArray jarr, PropertyMap props, SchemaNa if (null == unionType) throw new SchemaParseException("Invalid JSON in union" + jvalue.ToString()); - string name = unionType.Name; + string name = unionType.Fullname; if (uniqueSchemas.ContainsKey(name)) throw new SchemaParseException("Duplicate type in union: " + name); diff --git a/lang/csharp/src/apache/test/Schema/SchemaTests.cs b/lang/csharp/src/apache/test/Schema/SchemaTests.cs index b1248318750..885b5a40b79 100644 --- a/lang/csharp/src/apache/test/Schema/SchemaTests.cs +++ b/lang/csharp/src/apache/test/Schema/SchemaTests.cs @@ -69,7 +69,8 @@ public class SchemaTests Description = "No fields", ExpectedException = typeof(SchemaParseException))] [TestCase("{\"type\":\"record\",\"name\":\"LongList\", \"fields\": \"hi\"}", Description = "Fields not an array", ExpectedException = typeof(SchemaParseException))] - + [TestCase("[{\"type\": \"record\",\"name\": \"Test\",\"namespace\":\"ns1\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}," + + "{\"type\": \"record\",\"name\": \"Test\",\"namespace\":\"ns2\",\"fields\": [{\"name\": \"f\",\"type\": \"long\"}]}]")] // Enum [TestCase("{\"type\": \"enum\", \"name\": \"Test\", \"symbols\": [\"A\", \"B\"]}")] [TestCase("{\"type\": \"enum\", \"name\": \"Status\", \"symbols\": \"Normal Caution Critical\"}",