Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CARBONDATA-2635][BloomDataMap] Support different index datamaps on same column #2405

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ protected final CarbonTable getMainTable() {
return mainTable;
}

protected final DataMapSchema getDataMapSchema() {
public final DataMapSchema getDataMapSchema() {
return dataMapSchema;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,32 +225,6 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll {
sql("DROP TABLE IF EXISTS datamap_test3")
}

test("test lucene fine grain data map for create datamap with Duplicate Columns") {
sql("DROP TABLE IF EXISTS datamap_test_table")
sql(
"""
| CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT)
| STORED BY 'carbondata'
| TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
""".stripMargin)
val exception_duplicate_column: Exception = intercept[MalformedDataMapCommandException] {
sql(
s"""
| CREATE DATAMAP dm ON TABLE datamap_test_table
| USING 'lucene'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
sql(
s"""
| CREATE DATAMAP dm1 ON TABLE datamap_test_table
| USING 'lucene'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
}
assertResult("column 'name' already has datamap created")(exception_duplicate_column.getMessage)
sql("drop datamap if exists dm on table datamap_test_table")
}

test("test lucene fine grain data map with wildcard matching ") {
sql("DROP TABLE IF EXISTS datamap_test_table")
sql(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,57 @@ class TestDataMapCommand extends QueryTest with BeforeAndAfterAll {
sql("drop table main")
}

test("test create datamap: unable to create same index datamap for one column") {
sql("DROP TABLE IF EXISTS datamap_test_table")
sql(
"""
| CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT)
| STORED BY 'carbondata'
| TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
""".stripMargin)
val exception_duplicate_column: Exception = intercept[MalformedDataMapCommandException] {
sql(
s"""
| CREATE DATAMAP dm ON TABLE datamap_test_table
| USING 'lucene'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
sql(
s"""
| CREATE DATAMAP dm1 ON TABLE datamap_test_table
| USING 'lucene'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
}
assertResult("column 'name' already has lucene index datamap created")(exception_duplicate_column.getMessage)
sql("drop table if exists datamap_test_table")
}

test("test create datamap: able to create different index datamap for one column") {
sql("DROP TABLE IF EXISTS datamap_test_table")
sql(
"""
| CREATE TABLE datamap_test_table(id INT, name STRING, city STRING, age INT)
| STORED BY 'carbondata'
| TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT')
""".stripMargin)
sql(
s"""
| CREATE DATAMAP dm ON TABLE datamap_test_table
| USING 'lucene'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
sql(
s"""
| CREATE DATAMAP dm1 ON TABLE datamap_test_table
| USING 'bloomfilter'
| DMProperties('INDEX_COLUMNS'='name')
""".stripMargin)
sql("show datamap on table datamap_test_table").show(false)
checkExistence(sql("show datamap on table datamap_test_table"), true, "dm", "dm1", "lucene", "bloomfilter")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add assert to check data map selection for the query also, by using explain statement

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently in carbondata, if we have multiple indexdatamap, the explain result will only show the first datamap and the other datamap will not be shown.
PR #2411 is raised to solve this problem

sql("drop table if exists datamap_test_table")
}

override def afterAll {
sql("DROP TABLE IF EXISTS maintable")
sql("drop table if exists uniqdata")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@
*/
package org.apache.spark.sql.execution.command.datamap

import java.util

import scala.collection.JavaConverters._
import scala.collection.mutable

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
Expand Down Expand Up @@ -99,17 +96,22 @@ case class CarbonCreateDataMapCommand(
dataMapProvider match {
case provider: IndexDataMapProvider =>
val datamaps = DataMapStoreManager.getInstance.getAllDataMap(mainTable).asScala
val existingIndexColumn = mutable.Set[String]()
datamaps.foreach { datamap =>
datamap.getDataMapSchema.getIndexColumns.foreach(existingIndexColumn.add)
}
val thisDmProviderName =
dataMapProvider.asInstanceOf[IndexDataMapProvider].getDataMapSchema.getProviderName
val existingIndexColumn4ThisProvider = datamaps.filter { datamap =>
thisDmProviderName.equalsIgnoreCase(datamap.getDataMapSchema.getProviderName)
}.flatMap { datamap =>
datamap.getDataMapSchema.getIndexColumns
}.distinct

provider.getIndexedColumns.asScala.foreach { column =>
if (existingIndexColumn.contains(column.getColName)) {
if (existingIndexColumn4ThisProvider.contains(column.getColName)) {
throw new MalformedDataMapCommandException(String.format(
"column '%s' already has datamap created", column.getColName))
"column '%s' already has %s index datamap created",
column.getColName, thisDmProviderName))
}
}

val operationContext: OperationContext = new OperationContext()
val systemFolderLocation: String = CarbonProperties.getInstance().getSystemFolderLocation
val createDataMapPreExecutionEvent: CreateDataMapPreExecutionEvent =
Expand Down