Skip to content

Commit

Permalink
Merge 5c7645e into d5a2c69
Browse files Browse the repository at this point in the history
  • Loading branch information
shardul-cr7 committed Dec 27, 2018
2 parents d5a2c69 + 5c7645e commit df517f9
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
4 changes: 3 additions & 1 deletion docs/ddl-of-carbondata.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ CarbonData DDL statements are documented here,which includes:

By default inverted index is disabled as store size will be reduced, it can be enabled by using a table property. It might help to improve compression ratio and query speed, especially for low cardinality columns which are in reward position.
Suggested use cases : For high cardinality columns, you can disable the inverted index for improving the data loading performance.

**NOTE**: Columns specified in INVERTED_INDEX should also be present in SORT_COLUMNS.

```
TBLPROPERTIES ('NO_INVERTED_INDEX'='column1', 'INVERTED_INDEX'='column2, column3')
TBLPROPERTIES ('SORT_COLUMNS'='column2,column3','NO_INVERTED_INDEX'='column1', 'INVERTED_INDEX'='column2, column3')
```

- ##### Sort Columns Configuration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
TBLPROPERTIES('DICTIONARY_INCLUDE'='id','INVERTED_INDEX'='city,name')
TBLPROPERTIES('DICTIONARY_INCLUDE'='id','INVERTED_INDEX'='city,name', 'SORT_COLUMNS'='city,name')
""")
sql(
s"""
Expand Down Expand Up @@ -333,14 +333,14 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
TBLPROPERTIES('INVERTED_INDEX'='city,name,id')
TBLPROPERTIES('INVERTED_INDEX'='city,name,id','SORT_COLUMNS'='city,name,id')
""")
val carbonTable = CarbonMetadata.getInstance().getCarbonTable("default", "index1")
assert(carbonTable.getColumnByName("index1", "city").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
assert(carbonTable.getColumnByName("index1", "name").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
assert(!carbonTable.getColumnByName("index1", "id").getColumnSchema.getEncodingList
assert(carbonTable.getColumnByName("index1", "id").getColumnSchema.getEncodingList
.contains(Encoding.INVERTED_INDEX))
}

Expand All @@ -352,7 +352,7 @@ class TestNoInvertedIndexLoadAndQuery extends QueryTest with BeforeAndAfterAll {
CREATE TABLE IF NOT EXISTS index1
(id Int, name String, city String)
STORED BY 'org.apache.carbondata.format'
TBLPROPERTIES('NO_INVERTED_INDEX'='city','INVERTED_INDEX'='city')
TBLPROPERTIES('NO_INVERTED_INDEX'='city','INVERTED_INDEX'='city','SORT_COLUMNS'='city')
""")
}
assert(exception.getMessage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,19 @@ class VarcharDataTypesBasicTestCase extends QueryTest with BeforeAndAfterEach wi
assert(exceptionCaught.getMessage.contains("both in no_inverted_index and long_string_columns"))
}

test("inverted index columns cannot be present in long_string_cols as they do not support sort_cols") {
val exceptionCaught = intercept[MalformedCarbonCommandException] {
sql(
s"""
| CREATE TABLE if not exists $longStringTable(
| id INT, name STRING, description STRING, address STRING, note STRING
| ) STORED BY 'carbondata'
| TBLPROPERTIES('inverted_index'='note', 'long_string_columns'='note,description')
|""".stripMargin)
}
assert(exceptionCaught.getMessage.contains("INVERTED_INDEX column: note should be present in SORT_COLUMNS"))
}

private def prepareTable(): Unit = {
sql(
s"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,16 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
// get inverted index columns from table properties
val invertedIdxCols = extractInvertedIndexColumns(fields, tableProperties)

// Validate if columns present in inverted index are part of sort columns.
if (invertedIdxCols.nonEmpty) {
invertedIdxCols.foreach { column =>
if (!sortKeyDims.contains(column)) {
val errMsg = "INVERTED_INDEX column: " + column + " should be present in SORT_COLUMNS"
throw new MalformedCarbonCommandException(errMsg)
}
}
}

// check for any duplicate columns in inverted and noinverted columns defined in tblproperties
if (invertedIdxCols.nonEmpty && noInvertedIdxCols.nonEmpty) {
invertedIdxCols.foreach { distCol =>
Expand Down

0 comments on commit df517f9

Please sign in to comment.