-
Notifications
You must be signed in to change notification settings - Fork 703
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CARBONDATA-2608] SDK Support JSON data loading directly (without AVR…
…O conversion) What changes were proposed in this pull request? Currently SDK Support JSON data loading only with AVRO. So, converting json to avro record and avro to carbon object is a two step process. Hence there is a need for a new carbonWriter that works with Json without AVRO. This PR implents that. Highlights: # Works with just the json data and carbon schema. # Implements hadoop's FileInputFormat to create JsonInputFormat. # supports reading multiple json files in a folder. # supports reading json data in multiline with record identifier. # supports single row json read and write. # Handles bad records when loading json data How was this patch tested? Manual testing, and UTs are added in this PR.
- Loading branch information
1 parent
d5e86db
commit 61deac4
Showing
28 changed files
with
2,041 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3117,4 +3117,3 @@ public static void setLocalDictColumnsToWrapperSchema(List<ColumnSchema> columns | |
} | ||
} | ||
} | ||
|
15 changes: 15 additions & 0 deletions
15
integration/spark-common-test/src/test/resources/jsonFiles/data/StructOfAllTypes.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"StructColumn":{ | ||
"stringField": "bob", | ||
"intField": 10, | ||
"longField": 12345678, | ||
"doubleField": 123400.78, | ||
"boolField": true, | ||
"FloorNum": [ 1, 2], | ||
"FloorString": [ "abc", "def"], | ||
"FloorLong": [ 1234567, 2345678], | ||
"FloorDouble": [ 1.0, 2.0, 33.33], | ||
"FloorBool": [ true, false, false, true] | ||
} | ||
} | ||
|
11 changes: 11 additions & 0 deletions
11
integration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveType.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"stringField": "ajantha", | ||
"intField": 26, | ||
"shortField": 26, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2019-03-02", | ||
"timeField": "2019-02-12 03:03:34", | ||
"decimalField" : 55.35 | ||
} |
11 changes: 11 additions & 0 deletions
11
...ration/spark-common-test/src/test/resources/jsonFiles/data/allPrimitiveTypeBadRecord.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"stringField": 123, | ||
"intField": "string", | ||
"shortField": 1234567, | ||
"longField": 23.5, | ||
"doubleField": "string", | ||
"boolField": 10, | ||
"dateField": 12345, | ||
"timeField": 12345, | ||
"decimalField" : "String" | ||
} |
21 changes: 21 additions & 0 deletions
21
integration/spark-common-test/src/test/resources/jsonFiles/data/arrayOfStructOfStruct.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"name": "bob", | ||
"age": 10, | ||
"doorNum": [ | ||
{ | ||
"street": "abc", | ||
"city": "city1", | ||
"FloorNum": {"wing" : "a", "number" : 1} | ||
}, | ||
{ | ||
"street": "def", | ||
"city": "city2", | ||
"FloorNum": {"wing" : "b", "number" : 0} | ||
}, | ||
{ | ||
"street": "ghi", | ||
"city": "city3", | ||
"FloorNum": {"wing" : "a", "number" : 2} | ||
} | ||
] | ||
} |
30 changes: 30 additions & 0 deletions
30
...tion/spark-common-test/src/test/resources/jsonFiles/data/arrayOfarrayOfarrayOfStruct.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"name": "ajantha", | ||
"age": 26, | ||
"BuildNum": [ | ||
[ | ||
[ | ||
{"street":"abc", "city":"city1"}, | ||
{"street":"def", "city":"city2"}, | ||
{"street":"cfg", "city":"city3"} | ||
], | ||
[ | ||
{"street":"abc1", "city":"city3"}, | ||
{"street":"def1", "city":"city4"}, | ||
{"street":"cfg1", "city":"city5"} | ||
] | ||
], | ||
[ | ||
[ | ||
{"street":"abc2", "city":"cityx"}, | ||
{"street":"abc3", "city":"cityy"}, | ||
{"street":"abc4", "city":"cityz"} | ||
], | ||
[ | ||
{"street":"a1bc", "city":"cityA"}, | ||
{"street":"a1bc", "city":"cityB"}, | ||
{"street":"a1bc", "city":"cityc"} | ||
] | ||
] | ||
] | ||
} |
4 changes: 4 additions & 0 deletions
4
...resources/jsonFiles/data/similarSchemaFiles/JsonReaderTest/MultipleRowSingleLineJson.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35} | ||
{"stringField": "bbbb","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35} | ||
{"stringField": "cccc","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35} | ||
{"stringField": "dddd","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35} |
1 change: 1 addition & 0 deletions
1
...t/resources/jsonFiles/data/similarSchemaFiles/JsonReaderTest/SingleRowSingleLineJson.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35} |
54 changes: 54 additions & 0 deletions
54
.../JsonReaderTest/withRecordIdentifier/MultipleRowMultipleLineJsonWithRecordIdentifier.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
[ | ||
{ | ||
"jsonData": { | ||
"stringField": "ajantha", | ||
"intField": 26, | ||
"shortField": 26, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2019-03-02", | ||
"timeField": "2019-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
}, | ||
{ | ||
"jsonData": { | ||
"stringField": "ab", | ||
"intField": 25, | ||
"shortField": 25, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2018-03-02", | ||
"timeField": "2018-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
}, | ||
{ | ||
"jsonData": { | ||
"stringField": "cd", | ||
"intField": 24, | ||
"shortField": 24, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2017-03-02", | ||
"timeField": "2017-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
}, | ||
{ | ||
"jsonData": { | ||
"stringField": "ef", | ||
"intField": 23, | ||
"shortField": 23, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2016-03-02", | ||
"timeField": "2016-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
} | ||
] |
12 changes: 12 additions & 0 deletions
12
...es/JsonReaderTest/withRecordIdentifier/SingleRowMultipleLineJsonWithRecordIdentifier.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{"jsonData":{ | ||
"stringField": "ajantha", | ||
"intField": 26, | ||
"shortField": 26, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2019-03-02", | ||
"timeField": "2019-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
} |
1 change: 1 addition & 0 deletions
1
...iles/JsonReaderTest/withRecordIdentifier/SingleRowSingleLineJsonWithRecordIdentifier.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"jsonField":{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}} |
46 changes: 46 additions & 0 deletions
46
...st/src/test/resources/jsonFiles/data/similarSchemaFiles/allPrimitiveTypeMultipleRows.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
[ | ||
{ | ||
"stringField": "ajantha", | ||
"intField": 26, | ||
"shortField": 26, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2019-03-02", | ||
"timeField": "2019-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
}, | ||
{ | ||
"stringField": "ab", | ||
"intField": 25, | ||
"shortField": 25, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2018-03-02", | ||
"timeField": "2018-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
}, | ||
{ | ||
"stringField": "cd", | ||
"intField": 24, | ||
"shortField": 24, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2017-03-02", | ||
"timeField": "2017-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
}, | ||
{ | ||
"stringField": "ef", | ||
"intField": 23, | ||
"shortField": 23, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2016-03-02", | ||
"timeField": "2016-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
] |
13 changes: 13 additions & 0 deletions
13
...est/src/test/resources/jsonFiles/data/similarSchemaFiles/allPrimitiveTypeSingleArray.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[ | ||
{ | ||
"stringField": "ZZ", | ||
"intField": 100, | ||
"shortField": 100, | ||
"longField": 1234567, | ||
"doubleField": 23.3333, | ||
"boolField": false, | ||
"dateField": "2020-03-02", | ||
"timeField": "2020-02-12 03:03:34", | ||
"decimalField": 55.35 | ||
} | ||
] |
85 changes: 85 additions & 0 deletions
85
integration/spark-common-test/src/test/resources/jsonFiles/schema/StructOfAllTypes.avsc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
{ | ||
"name": "address", | ||
"type": "record", | ||
"fields": [ | ||
{ | ||
"name": "StructColumn", | ||
"type": { | ||
"type": "record", | ||
"name": "my_struct", | ||
"fields": [ | ||
{ | ||
"name": "stringField", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "intField", | ||
"type": "int" | ||
}, | ||
{ | ||
"name": "longField", | ||
"type": "long" | ||
}, | ||
{ | ||
"name": "doubleField", | ||
"type": "double" | ||
}, | ||
{ | ||
"name": "boolField", | ||
"type": "boolean" | ||
}, | ||
{ | ||
"name": "FloorNum", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"name": "IntegerE", | ||
"type": "int" | ||
} | ||
} | ||
}, | ||
{ | ||
"name": "FloorString", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"name": "StringE", | ||
"type": "string" | ||
} | ||
} | ||
}, | ||
{ | ||
"name": "FloorLong", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"name": "longFieldE", | ||
"type": "long" | ||
} | ||
} | ||
}, | ||
{ | ||
"name": "FloorDouble", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"name": "doubleFieldE", | ||
"type": "double" | ||
} | ||
} | ||
}, | ||
{ | ||
"name": "FloorBool", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"name": "boolFieldE", | ||
"type": "boolean" | ||
} | ||
} | ||
} | ||
] | ||
} | ||
} | ||
] | ||
} |
51 changes: 51 additions & 0 deletions
51
integration/spark-common-test/src/test/resources/jsonFiles/schema/arrayOfStructOfStruct.avsc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
{ | ||
"name": "address", | ||
"type": "record", | ||
"fields": [ | ||
{ | ||
"name": "name", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "age", | ||
"type": "int" | ||
}, | ||
{ | ||
"name": "doorNum", | ||
"type": { | ||
"type": "array", | ||
"items": { | ||
"type": "record", | ||
"name": "my_address", | ||
"fields": [ | ||
{ | ||
"name": "street", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "city", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "FloorNum", | ||
"type": { | ||
"type": "record", | ||
"name": "Floor", | ||
"fields": [ | ||
{ | ||
"name": "wing", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "number", | ||
"type": "int" | ||
} | ||
] | ||
} | ||
} | ||
] | ||
} | ||
} | ||
} | ||
] | ||
} |
Oops, something went wrong.