Skip to content

Commit

Permalink
[CARBONDATA-2608] SDK Support JSON data loading directly (without AVR…
Browse files Browse the repository at this point in the history
…O conversion)

What changes were proposed in this pull request?
Currently SDK Support JSON data loading only with AVRO.
So, converting json to avro record and avro to carbon object is a two
step process. Hence there is a need for a new carbonWriter that works
with Json without AVRO.
This PR implents that.

Highlights:
 # Works with just the json data and carbon schema.
 # Implements hadoop's FileInputFormat to create JsonInputFormat.
 # supports reading multiple json files in a folder.
 # supports reading json data in multiline with record identifier.
 # supports single row json read and write.
 # Handles bad records when loading json data

How was this patch tested?

Manual testing, and UTs are added in this PR.
  • Loading branch information
ajantha-bhat committed Jun 24, 2018
1 parent d5e86db commit 61deac4
Show file tree
Hide file tree
Showing 28 changed files with 2,041 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3117,4 +3117,3 @@ public static void setLocalDictColumnsToWrapperSchema(List<ColumnSchema> columns
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"StructColumn":{
"stringField": "bob",
"intField": 10,
"longField": 12345678,
"doubleField": 123400.78,
"boolField": true,
"FloorNum": [ 1, 2],
"FloorString": [ "abc", "def"],
"FloorLong": [ 1234567, 2345678],
"FloorDouble": [ 1.0, 2.0, 33.33],
"FloorBool": [ true, false, false, true]
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"stringField": "ajantha",
"intField": 26,
"shortField": 26,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2019-03-02",
"timeField": "2019-02-12 03:03:34",
"decimalField" : 55.35
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"stringField": 123,
"intField": "string",
"shortField": 1234567,
"longField": 23.5,
"doubleField": "string",
"boolField": 10,
"dateField": 12345,
"timeField": 12345,
"decimalField" : "String"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "bob",
"age": 10,
"doorNum": [
{
"street": "abc",
"city": "city1",
"FloorNum": {"wing" : "a", "number" : 1}
},
{
"street": "def",
"city": "city2",
"FloorNum": {"wing" : "b", "number" : 0}
},
{
"street": "ghi",
"city": "city3",
"FloorNum": {"wing" : "a", "number" : 2}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"name": "ajantha",
"age": 26,
"BuildNum": [
[
[
{"street":"abc", "city":"city1"},
{"street":"def", "city":"city2"},
{"street":"cfg", "city":"city3"}
],
[
{"street":"abc1", "city":"city3"},
{"street":"def1", "city":"city4"},
{"street":"cfg1", "city":"city5"}
]
],
[
[
{"street":"abc2", "city":"cityx"},
{"street":"abc3", "city":"cityy"},
{"street":"abc4", "city":"cityz"}
],
[
{"street":"a1bc", "city":"cityA"},
{"street":"a1bc", "city":"cityB"},
{"street":"a1bc", "city":"cityc"}
]
]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}
{"stringField": "bbbb","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}
{"stringField": "cccc","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}
{"stringField": "dddd","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
[
{
"jsonData": {
"stringField": "ajantha",
"intField": 26,
"shortField": 26,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2019-03-02",
"timeField": "2019-02-12 03:03:34",
"decimalField": 55.35
}
},
{
"jsonData": {
"stringField": "ab",
"intField": 25,
"shortField": 25,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2018-03-02",
"timeField": "2018-02-12 03:03:34",
"decimalField": 55.35
}
},
{
"jsonData": {
"stringField": "cd",
"intField": 24,
"shortField": 24,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2017-03-02",
"timeField": "2017-02-12 03:03:34",
"decimalField": 55.35
}
},
{
"jsonData": {
"stringField": "ef",
"intField": 23,
"shortField": 23,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2016-03-02",
"timeField": "2016-02-12 03:03:34",
"decimalField": 55.35
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"jsonData":{
"stringField": "ajantha",
"intField": 26,
"shortField": 26,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2019-03-02",
"timeField": "2019-02-12 03:03:34",
"decimalField": 55.35
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"jsonField":{"stringField": "kkkk","intField": 26,"shortField": 26,"longField": 1234567,"doubleField": 23.3333,"boolField": false,"dateField": "2019-03-02","timeField": "2019-02-12 03:03:34","decimalField" : 55.35}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
[
{
"stringField": "ajantha",
"intField": 26,
"shortField": 26,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2019-03-02",
"timeField": "2019-02-12 03:03:34",
"decimalField": 55.35
},
{
"stringField": "ab",
"intField": 25,
"shortField": 25,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2018-03-02",
"timeField": "2018-02-12 03:03:34",
"decimalField": 55.35
},
{
"stringField": "cd",
"intField": 24,
"shortField": 24,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2017-03-02",
"timeField": "2017-02-12 03:03:34",
"decimalField": 55.35
},
{
"stringField": "ef",
"intField": 23,
"shortField": 23,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2016-03-02",
"timeField": "2016-02-12 03:03:34",
"decimalField": 55.35
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"stringField": "ZZ",
"intField": 100,
"shortField": 100,
"longField": 1234567,
"doubleField": 23.3333,
"boolField": false,
"dateField": "2020-03-02",
"timeField": "2020-02-12 03:03:34",
"decimalField": 55.35
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"name": "address",
"type": "record",
"fields": [
{
"name": "StructColumn",
"type": {
"type": "record",
"name": "my_struct",
"fields": [
{
"name": "stringField",
"type": "string"
},
{
"name": "intField",
"type": "int"
},
{
"name": "longField",
"type": "long"
},
{
"name": "doubleField",
"type": "double"
},
{
"name": "boolField",
"type": "boolean"
},
{
"name": "FloorNum",
"type": {
"type": "array",
"items": {
"name": "IntegerE",
"type": "int"
}
}
},
{
"name": "FloorString",
"type": {
"type": "array",
"items": {
"name": "StringE",
"type": "string"
}
}
},
{
"name": "FloorLong",
"type": {
"type": "array",
"items": {
"name": "longFieldE",
"type": "long"
}
}
},
{
"name": "FloorDouble",
"type": {
"type": "array",
"items": {
"name": "doubleFieldE",
"type": "double"
}
}
},
{
"name": "FloorBool",
"type": {
"type": "array",
"items": {
"name": "boolFieldE",
"type": "boolean"
}
}
}
]
}
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"name": "address",
"type": "record",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "int"
},
{
"name": "doorNum",
"type": {
"type": "array",
"items": {
"type": "record",
"name": "my_address",
"fields": [
{
"name": "street",
"type": "string"
},
{
"name": "city",
"type": "string"
},
{
"name": "FloorNum",
"type": {
"type": "record",
"name": "Floor",
"fields": [
{
"name": "wing",
"type": "string"
},
{
"name": "number",
"type": "int"
}
]
}
}
]
}
}
}
]
}

0 comments on commit 61deac4

Please sign in to comment.