Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
385 additions
and
0 deletions.
There are no files selected for viewing
385 changes: 385 additions & 0 deletions
385
...c/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,385 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.carbondata.cluster.sdv.generated | ||
|
||
import org.apache.spark.sql.Row | ||
import org.apache.spark.sql.test.util.QueryTest | ||
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} | ||
|
||
import org.apache.carbondata.core.constants.CarbonCommonConstants | ||
import org.apache.carbondata.core.util.CarbonProperties | ||
|
||
class BloomFilterDataMapTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll { | ||
|
||
override protected def beforeAll(): Unit = { | ||
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, | ||
"yyyy-MM-dd") | ||
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, | ||
"yyyy-MM-dd HH:mm:ss") | ||
|
||
} | ||
|
||
override protected def afterAll(): Unit = { | ||
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, | ||
CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) | ||
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, | ||
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) | ||
} | ||
|
||
private def createAllDataTypeTable(tableName: String): Unit = { | ||
sql(s"DROP TABLE IF EXISTS $tableName") | ||
sql( | ||
s""" | ||
| CREATE TABLE $tableName( | ||
| booleanField boolean, | ||
| shortField smallint, | ||
| intField int, | ||
| bigintField bigint, | ||
| doubleField double, | ||
| stringField string, | ||
| timestampField timestamp, | ||
| decimalField decimal(18,2), | ||
| dateField date, | ||
| charField string, | ||
| floatField float, | ||
| stringDictField string, | ||
| stringSortField string, | ||
| stringLocalDictField string, | ||
| longStringField string | ||
| ) | ||
| STORED BY 'carbondata' | ||
| TBLPROPERTIES( | ||
| 'LONG_STRING_COLUMNS'='longStringField', | ||
| 'SORT_COLUMNS'='stringSortField', | ||
| 'DICTIONARY_INCLUDE'='stringDictField', | ||
| 'local_dictionary_enable'='true', | ||
| 'local_dictionary_threshold'='10000', | ||
| 'local_dictionary_include'='stringLocalDictField', | ||
| 'CACHE_LEVEL'='BLOCKLET') | ||
""".stripMargin) | ||
} | ||
|
||
private def loadAllDataTypeTable(tableName: String): Unit = { | ||
sql( | ||
s""" | ||
| INSERT INTO TABLE $tableName VALUES | ||
| (true,1,11,101,41.4,'string1','2015-04-23 12:01:01',12.34,'2015-04-23','aaa',1.5,'dict1','sort1','local_dict1','longstring1'), | ||
| (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'), | ||
| (true,3,13,163,43.4,'string3','2015-07-26 12:01:06',34.56,'2015-07-26','ccc',3.5,'dict3','sort3','local_dict3','longstring3'), | ||
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| INSERT INTO TABLE $tableName VALUES | ||
| (true,${Short.MaxValue - 2},${Int.MinValue + 2},${Long.MaxValue - 2},${Double.MinValue + 2},'string1','2015-04-23 12:01:01',${Double.MinValue + 2},'2015-04-23','aaa',${Float.MaxValue - 2},'dict1','sort1','local_dict1','longstring1'), | ||
| (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'), | ||
| (true,${Short.MinValue + 2},${Int.MaxValue - 2},${Long.MinValue + 2},${Double.MaxValue - 2},'string3','2015-07-26 12:01:06',${Double.MinValue + 2},'2015-07-26','ccc',${Float.MinValue + 2},'dict3','sort3','local_dict3','longstring3'), | ||
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) | ||
""".stripMargin) | ||
} | ||
|
||
test("create bloomfilter datamap on all datatypes") { | ||
val tableName = "all_data_types" | ||
val dataMapName = "dm_with_all_data_types" | ||
createAllDataTypeTable(tableName) | ||
loadAllDataTypeTable(tableName) | ||
// create datamap on all supported datatype | ||
sql( | ||
s""" | ||
| CREATE DATAMAP $dataMapName ON TABLE $tableName | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField', | ||
| 'BLOOM_SIZE'='6400', | ||
| 'BLOOM_FPP'='0.001', | ||
| 'BLOOM_COMPRESS'='TRUE') | ||
""".stripMargin) | ||
loadAllDataTypeTable(tableName) | ||
checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName) | ||
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16))) | ||
checkExistence( | ||
sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), | ||
true, "bloomfilter", dataMapName) | ||
checkAnswer( | ||
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), | ||
Seq(Row(4))) | ||
sql(s"DROP TABLE IF EXISTS $tableName") | ||
} | ||
|
||
test("create bloomfilter datamap on all datatypes with sort columns") { | ||
val tableName = "all_data_types_with_sort_column" | ||
val dataMapName = "dm_with_all_data_types_with_sort_column" | ||
sql(s"DROP TABLE IF EXISTS $tableName") | ||
// double/float/decimal/longstring cannot be sort_columns so we ignore them in SORT_COLUMNS | ||
sql( | ||
s""" | ||
| CREATE TABLE $tableName( | ||
| booleanField boolean, | ||
| shortField smallint, | ||
| intField int, | ||
| bigintField bigint, | ||
| doubleField double, | ||
| stringField string, | ||
| timestampField timestamp, | ||
| decimalField decimal(18,2), | ||
| dateField date, | ||
| charField string, | ||
| floatField float, | ||
| stringDictField string, | ||
| stringSortField string, | ||
| stringLocalDictField string, | ||
| longStringField string | ||
| ) | ||
| STORED BY 'carbondata' | ||
| TBLPROPERTIES( | ||
| 'LONG_STRING_COLUMNS'='longStringField', | ||
| 'DICTIONARY_INCLUDE'='stringDictField', | ||
| 'local_dictionary_enable'='true', | ||
| 'local_dictionary_threshold'='10000', | ||
| 'local_dictionary_include'='stringLocalDictField', | ||
| 'SORT_COLUMNS'='booleanField, shortField, intField, bigintField, stringField, timestampField, dateField, charField, stringDictField, stringSortField, stringLocalDictField') | ||
""".stripMargin) | ||
loadAllDataTypeTable(tableName) | ||
// create datamap on all supported datatype | ||
sql( | ||
s""" | ||
| CREATE DATAMAP $dataMapName ON TABLE $tableName | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField', | ||
| 'BLOOM_SIZE'='6400', | ||
| 'BLOOM_FPP'='0.001', | ||
| 'BLOOM_COMPRESS'='TRUE') | ||
""".stripMargin) | ||
loadAllDataTypeTable(tableName) | ||
checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName) | ||
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16))) | ||
checkExistence( | ||
sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), | ||
true, "bloomfilter", dataMapName) | ||
checkAnswer( | ||
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), | ||
Seq(Row(4))) | ||
sql(s"DROP TABLE IF EXISTS $tableName") | ||
} | ||
|
||
test("test bloom datamap with empty values on index column") { | ||
val normalTable = "normal_table" | ||
val bloomDMSampleTable = "bloom_table" | ||
val dataMapName = "bloom_datamap" | ||
sql(s"DROP TABLE IF EXISTS $normalTable") | ||
sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") | ||
sql(s"CREATE TABLE $normalTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'") | ||
sql(s"CREATE TABLE $bloomDMSampleTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'") | ||
// load data with empty value | ||
sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'") | ||
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'") | ||
sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'") | ||
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'") | ||
|
||
sql( | ||
s""" | ||
| CREATE DATAMAP $dataMapName on table $bloomDMSampleTable | ||
| using 'bloomfilter' | ||
| DMPROPERTIES('index_columns'='c1, c2') | ||
""".stripMargin) | ||
|
||
// load data with empty value | ||
sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'") | ||
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'") | ||
sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'") | ||
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'") | ||
|
||
// query on null fields | ||
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable"), | ||
sql(s"SELECT * FROM $normalTable")) | ||
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = null"), | ||
sql(s"SELECT * FROM $normalTable WHERE c1 = null")) | ||
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = ''"), | ||
sql(s"SELECT * FROM $normalTable WHERE c1 = ''")) | ||
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c1)"), | ||
sql(s"SELECT * FROM $normalTable WHERE isNull(c1)")) | ||
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c2)"), | ||
sql(s"SELECT * FROM $normalTable WHERE isNull(c2)")) | ||
sql(s"DROP TABLE IF EXISTS $normalTable") | ||
sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") | ||
} | ||
|
||
test("create multiple datamaps vs create on datamap on multiple columns") { | ||
val tableName1 = "all_data_types10" | ||
val tableName2 = "all_data_types20" | ||
val dataMapName1 = "dm_with_all_data_types10" | ||
val dataMapName2Prefix = "dm_with_all_data_types2" | ||
sql(s"DROP TABLE IF EXISTS $tableName1") | ||
sql(s"DROP TABLE IF EXISTS $tableName2") | ||
createAllDataTypeTable(tableName1) | ||
createAllDataTypeTable(tableName2) | ||
loadAllDataTypeTable(tableName1) | ||
loadAllDataTypeTable(tableName2) | ||
// create one datamap on multiple index columns | ||
sql( | ||
s""" | ||
| CREATE DATAMAP $dataMapName1 ON TABLE $tableName1 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField' | ||
| ) | ||
""".stripMargin) | ||
// create multiple datamaps each on one index column | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}0 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='booleanField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}1 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='shortField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}2 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='intField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}3 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='bigintField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}4 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='doubleField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}5 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='stringField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}6 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='timestampField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}7 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='decimalField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}8 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='dateField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}9 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='charField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}10 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='floatField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}11 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='stringDictField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}12 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='stringSortField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}13 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='stringLocalDictField' | ||
| ) | ||
""".stripMargin) | ||
sql( | ||
s""" | ||
| CREATE DATAMAP ${dataMapName2Prefix}14 ON TABLE $tableName2 | ||
| USING 'bloomfilter' | ||
| DMPROPERTIES( | ||
| 'INDEX_COLUMNS'='longStringField' | ||
| ) | ||
""".stripMargin) | ||
|
||
loadAllDataTypeTable(tableName1) | ||
loadAllDataTypeTable(tableName2) | ||
assert(sql(s"SHOW DATAMAP ON TABLE $tableName1").collect().length == 1) | ||
assert(sql(s"SHOW DATAMAP ON TABLE $tableName2").collect().length == 15) | ||
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName1"), sql(s"SELECT COUNT(*) FROM $tableName2")) | ||
checkExistence( | ||
sql(s"EXPLAIN SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), | ||
true, "bloomfilter", dataMapName1) | ||
val allDataMapsOnTable2 = (0 to 14).map(p => s"$dataMapName2Prefix$p") | ||
val existedString = (allDataMapsOnTable2 :+ "bloomfilter").toArray | ||
checkExistence( | ||
sql(s"EXPLAIN SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), | ||
true, existedString: _*) | ||
|
||
checkAnswer( | ||
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), | ||
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b")) | ||
sql(s"DROP TABLE IF EXISTS $tableName1") | ||
sql(s"DROP TABLE IF EXISTS $tableName2") | ||
} | ||
} |