Skip to content

Commit

Permalink
[CARBONDATA-3301]Fix inserting null values to Array<date> columns in …
Browse files Browse the repository at this point in the history
…carbon file format data load

Problem:
When carbon datasource table contains columns like complex column with Array or Array and data is inserted and queried, it gives null data for those columns.

Solution:
In file format case before the actual load, we hwt the internal row object from spark, and we convert the internal row to carbondata understandable object, so that obejvct for date will be of Integertype, So while inserting data only long case is handled and we were passing this interger value to parse in SimpleDateFormat, which throws exception and we were inserting null. SO handled for integer. In this case directly assign the surrogate key with this value.

This closes #3133
  • Loading branch information
akashrn5 authored and ravipesala committed Mar 8, 2019
1 parent e443a94 commit a5fc19d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
Expand Up @@ -1760,6 +1760,16 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
spark.sql("drop table if exists fileformat_drop_hive")
}

test("test complexdatype for date and timestamp datatype") {
spark.sql("drop table if exists fileformat_date")
spark.sql("drop table if exists fileformat_date_hive")
spark.sql("create table fileformat_date_hive(name string, age int, dob array<date>, joinTime array<timestamp>) using parquet")
spark.sql("create table fileformat_date(name string, age int, dob array<date>, joinTime array<timestamp>) using carbon")
spark.sql("insert into fileformat_date_hive select 'joey', 32, array('1994-04-06','1887-05-06'), array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
spark.sql("insert into fileformat_date select 'joey', 32, array('1994-04-06','1887-05-06'), array('1994-04-06 00:00:05','1887-05-06 00:00:08')")
checkAnswer(spark.sql("select * from fileformat_date_hive"), spark.sql("select * from fileformat_date"))
}

test("validate the columns not present in schema") {
spark.sql("drop table if exists validate")
spark.sql("create table validate (name string, age int, address string) using carbon options('inverted_index'='abc')")
Expand All @@ -1785,5 +1795,6 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
spark.sql("drop table if exists par_table")
spark.sql("drop table if exists sdkout")
spark.sql("drop table if exists validate")
spark.sql("drop table if exists fileformat_date")
}
}
Expand Up @@ -344,7 +344,7 @@ public int getSurrogateIndex() {
byte[] value = null;
if (isDirectDictionary) {
int surrogateKey;
if (!(input instanceof Long)) {
if (!(input instanceof Long) && !(input instanceof Integer)) {
SimpleDateFormat parser = new SimpleDateFormat(getDateFormat(carbonDimension));
parser.parse(parsedValue);
}
Expand All @@ -353,6 +353,11 @@ public int getSurrogateIndex() {
// using dictionaryGenerator.
if (dictionaryGenerator instanceof DirectDictionary && input instanceof Long) {
surrogateKey = ((DirectDictionary) dictionaryGenerator).generateKey((long) input);
} else if (dictionaryGenerator instanceof DirectDictionary
&& input instanceof Integer) {
// In case of file format, for complex type date or time type, input data comes as a
// Integer object, so just assign the surrogate key with the input object value
surrogateKey = (int) input;
} else {
surrogateKey = dictionaryGenerator.getOrGenerateKey(parsedValue);
}
Expand Down

0 comments on commit a5fc19d

Please sign in to comment.