Skip to content

Commit

Permalink
Merge 111cb61 into f66a9fd
Browse files Browse the repository at this point in the history
  • Loading branch information
xuchuanyin committed Aug 15, 2018
2 parents f66a9fd + 111cb61 commit d604607
Showing 1 changed file with 385 additions and 0 deletions.
@@ -0,0 +1,385 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.carbondata.cluster.sdv.generated

import org.apache.spark.sql.Row
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties

class BloomFilterDataMapTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll {

override protected def beforeAll(): Unit = {
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
"yyyy-MM-dd")
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
"yyyy-MM-dd HH:mm:ss")

}

override protected def afterAll(): Unit = {
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT,
CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT)
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
}

private def createAllDataTypeTable(tableName: String): Unit = {
sql(s"DROP TABLE IF EXISTS $tableName")
sql(
s"""
| CREATE TABLE $tableName(
| booleanField boolean,
| shortField smallint,
| intField int,
| bigintField bigint,
| doubleField double,
| stringField string,
| timestampField timestamp,
| decimalField decimal(18,2),
| dateField date,
| charField string,
| floatField float,
| stringDictField string,
| stringSortField string,
| stringLocalDictField string,
| longStringField string
| )
| STORED BY 'carbondata'
| TBLPROPERTIES(
| 'LONG_STRING_COLUMNS'='longStringField',
| 'SORT_COLUMNS'='stringSortField',
| 'DICTIONARY_INCLUDE'='stringDictField',
| 'local_dictionary_enable'='true',
| 'local_dictionary_threshold'='10000',
| 'local_dictionary_include'='stringLocalDictField',
| 'CACHE_LEVEL'='BLOCKLET')
""".stripMargin)
}

private def loadAllDataTypeTable(tableName: String): Unit = {
sql(
s"""
| INSERT INTO TABLE $tableName VALUES
| (true,1,11,101,41.4,'string1','2015-04-23 12:01:01',12.34,'2015-04-23','aaa',1.5,'dict1','sort1','local_dict1','longstring1'),
| (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'),
| (true,3,13,163,43.4,'string3','2015-07-26 12:01:06',34.56,'2015-07-26','ccc',3.5,'dict3','sort3','local_dict3','longstring3'),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
sql(
s"""
| INSERT INTO TABLE $tableName VALUES
| (true,${Short.MaxValue - 2},${Int.MinValue + 2},${Long.MaxValue - 2},${Double.MinValue + 2},'string1','2015-04-23 12:01:01',${Double.MinValue + 2},'2015-04-23','aaa',${Float.MaxValue - 2},'dict1','sort1','local_dict1','longstring1'),
| (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'),
| (true,${Short.MinValue + 2},${Int.MaxValue - 2},${Long.MinValue + 2},${Double.MaxValue - 2},'string3','2015-07-26 12:01:06',${Double.MinValue + 2},'2015-07-26','ccc',${Float.MinValue + 2},'dict3','sort3','local_dict3','longstring3'),
| (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
""".stripMargin)
}

test("create bloomfilter datamap on all datatypes") {
val tableName = "all_data_types"
val dataMapName = "dm_with_all_data_types"
createAllDataTypeTable(tableName)
loadAllDataTypeTable(tableName)
// create datamap on all supported datatype
sql(
s"""
| CREATE DATAMAP $dataMapName ON TABLE $tableName
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField',
| 'BLOOM_SIZE'='6400',
| 'BLOOM_FPP'='0.001',
| 'BLOOM_COMPRESS'='TRUE')
""".stripMargin)
loadAllDataTypeTable(tableName)
checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName)
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16)))
checkExistence(
sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"),
true, "bloomfilter", dataMapName)
checkAnswer(
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"),
Seq(Row(4)))
sql(s"DROP TABLE IF EXISTS $tableName")
}

test("create bloomfilter datamap on all datatypes with sort columns") {
val tableName = "all_data_types_with_sort_column"
val dataMapName = "dm_with_all_data_types_with_sort_column"
sql(s"DROP TABLE IF EXISTS $tableName")
// double/float/decimal/longstring cannot be sort_columns so we ignore them in SORT_COLUMNS
sql(
s"""
| CREATE TABLE $tableName(
| booleanField boolean,
| shortField smallint,
| intField int,
| bigintField bigint,
| doubleField double,
| stringField string,
| timestampField timestamp,
| decimalField decimal(18,2),
| dateField date,
| charField string,
| floatField float,
| stringDictField string,
| stringSortField string,
| stringLocalDictField string,
| longStringField string
| )
| STORED BY 'carbondata'
| TBLPROPERTIES(
| 'LONG_STRING_COLUMNS'='longStringField',
| 'DICTIONARY_INCLUDE'='stringDictField',
| 'local_dictionary_enable'='true',
| 'local_dictionary_threshold'='10000',
| 'local_dictionary_include'='stringLocalDictField',
| 'SORT_COLUMNS'='booleanField, shortField, intField, bigintField, stringField, timestampField, dateField, charField, stringDictField, stringSortField, stringLocalDictField')
""".stripMargin)
loadAllDataTypeTable(tableName)
// create datamap on all supported datatype
sql(
s"""
| CREATE DATAMAP $dataMapName ON TABLE $tableName
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField',
| 'BLOOM_SIZE'='6400',
| 'BLOOM_FPP'='0.001',
| 'BLOOM_COMPRESS'='TRUE')
""".stripMargin)
loadAllDataTypeTable(tableName)
checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName)
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16)))
checkExistence(
sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"),
true, "bloomfilter", dataMapName)
checkAnswer(
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"),
Seq(Row(4)))
sql(s"DROP TABLE IF EXISTS $tableName")
}

test("test bloom datamap with empty values on index column") {
val normalTable = "normal_table"
val bloomDMSampleTable = "bloom_table"
val dataMapName = "bloom_datamap"
sql(s"DROP TABLE IF EXISTS $normalTable")
sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable")
sql(s"CREATE TABLE $normalTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'")
sql(s"CREATE TABLE $bloomDMSampleTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'")
// load data with empty value
sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'")
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'")
sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'")
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'")

sql(
s"""
| CREATE DATAMAP $dataMapName on table $bloomDMSampleTable
| using 'bloomfilter'
| DMPROPERTIES('index_columns'='c1, c2')
""".stripMargin)

// load data with empty value
sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'")
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'")
sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'")
sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'")

// query on null fields
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable"),
sql(s"SELECT * FROM $normalTable"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = null"),
sql(s"SELECT * FROM $normalTable WHERE c1 = null"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = ''"),
sql(s"SELECT * FROM $normalTable WHERE c1 = ''"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c1)"),
sql(s"SELECT * FROM $normalTable WHERE isNull(c1)"))
checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c2)"),
sql(s"SELECT * FROM $normalTable WHERE isNull(c2)"))
sql(s"DROP TABLE IF EXISTS $normalTable")
sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable")
}

test("create multiple datamaps vs create on datamap on multiple columns") {
val tableName1 = "all_data_types10"
val tableName2 = "all_data_types20"
val dataMapName1 = "dm_with_all_data_types10"
val dataMapName2Prefix = "dm_with_all_data_types2"
sql(s"DROP TABLE IF EXISTS $tableName1")
sql(s"DROP TABLE IF EXISTS $tableName2")
createAllDataTypeTable(tableName1)
createAllDataTypeTable(tableName2)
loadAllDataTypeTable(tableName1)
loadAllDataTypeTable(tableName2)
// create one datamap on multiple index columns
sql(
s"""
| CREATE DATAMAP $dataMapName1 ON TABLE $tableName1
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField'
| )
""".stripMargin)
// create multiple datamaps each on one index column
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}0 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='booleanField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}1 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='shortField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}2 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='intField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}3 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='bigintField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}4 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='doubleField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}5 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='stringField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}6 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='timestampField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}7 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='decimalField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}8 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='dateField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}9 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='charField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}10 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='floatField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}11 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='stringDictField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}12 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='stringSortField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}13 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='stringLocalDictField'
| )
""".stripMargin)
sql(
s"""
| CREATE DATAMAP ${dataMapName2Prefix}14 ON TABLE $tableName2
| USING 'bloomfilter'
| DMPROPERTIES(
| 'INDEX_COLUMNS'='longStringField'
| )
""".stripMargin)

loadAllDataTypeTable(tableName1)
loadAllDataTypeTable(tableName2)
assert(sql(s"SHOW DATAMAP ON TABLE $tableName1").collect().length == 1)
assert(sql(s"SHOW DATAMAP ON TABLE $tableName2").collect().length == 15)
checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName1"), sql(s"SELECT COUNT(*) FROM $tableName2"))
checkExistence(
sql(s"EXPLAIN SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"),
true, "bloomfilter", dataMapName1)
val allDataMapsOnTable2 = (0 to 14).map(p => s"$dataMapName2Prefix$p")
val existedString = (allDataMapsOnTable2 :+ "bloomfilter").toArray
checkExistence(
sql(s"EXPLAIN SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"),
true, existedString: _*)

checkAnswer(
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"),
sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"))
sql(s"DROP TABLE IF EXISTS $tableName1")
sql(s"DROP TABLE IF EXISTS $tableName2")
}
}

0 comments on commit d604607

Please sign in to comment.