Skip to content

Commit

Permalink
Support adding local dictionary configuration in create table stateme…
Browse files Browse the repository at this point in the history
…nt and show the configs in describe formatted table

What changes were proposed in this pull request?
In this PR, in order to support local dictionary,

create table changes are made to support local dictionary configurations as table properties
show local dictionary properties in describe formatted command based on whether the local dictionary enabled or disabled.
Highlights:
basically we will have four properties

LOCAL_DICT_ENABLE => whether to enable or disable local dictionary
LOCAL_DICT_THRESHOLD => threshold property for the column to generate local dictionary
LOCAL_DICT_INCLUDE => columns for which local dictionary needs to be generated
LOCAL_DICT_EXCLUDE => columns for which local dictionary should not be generated

This closes#2375
  • Loading branch information
akashrn5 authored and kumarvishal09 committed Jun 19, 2018
1 parent ca466d9 commit be20fef
Show file tree
Hide file tree
Showing 10 changed files with 417 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -910,6 +910,37 @@ public final class CarbonCommonConstants {
public static final String COLUMN_GROUPS = "column_groups";
public static final String DICTIONARY_EXCLUDE = "dictionary_exclude";
public static final String DICTIONARY_INCLUDE = "dictionary_include";

/**
* Table property to enable or disable local dictionary generation
*/
public static final String LOCAL_DICTIONARY_ENABLE = "local_dictionary_enable";

/**
* default value for local dictionary generation
*/
public static final String LOCAL_DICTIONARY_ENABLE_DEFAULT = "true";

/**
* Threshold value for local dictionary
*/
public static final String LOCAL_DICTIONARY_THRESHOLD = "local_dictionary_threshold";

/**
* default value for local dictionary
*/
public static final String LOCAL_DICTIONARY_THRESHOLD_DEFAULT = "1000";

/**
* Table property to specify the columns for which local dictionary needs to be generated.
*/
public static final String LOCAL_DICTIONARY_INCLUDE = "local_dictionary_include";

/**
* Table property to specify the columns for which local dictionary should not be to be generated.
*/
public static final String LOCAL_DICTIONARY_EXCLUDE = "local_dictionary_exclude";

/**
* key for dictionary path
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.carbondata.core.metadata.schema.table.TableSchema;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.metadata.schema.table.column.ParentColumnTableRelation;
import org.apache.carbondata.core.util.CarbonUtil;

/**
* Thrift schema to carbon schema converter and vice versa
Expand Down Expand Up @@ -594,6 +595,10 @@ private PartitionInfo fromExternalToWrapperPartitionInfo(
.getTable_columns()) {
listOfColumns.add(fromExternalToWrapperColumnSchema(externalColumnSchema));
}
if (null != externalTableSchema.tableProperties) {
CarbonUtil
.setLocalDictColumnsToWrapperSchema(listOfColumns, externalTableSchema.tableProperties);
}
wrapperTableSchema.setListOfColumns(listOfColumns);
wrapperTableSchema.setSchemaEvolution(
fromExternalToWrapperSchemaEvolution(externalTableSchema.getSchema_evolution()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@ public class CarbonTable implements Serializable {

private boolean hasDataMapSchema;

/**
* is local dictionary generation enabled for the table
*/
private boolean isLocalDictionaryEnabled;

/**
* local dictionary generation threshold
*/
private int localDictionaryThreshold;

/**
* The boolean field which points if the data written for Non Transactional Table
* or Transactional Table.
Expand Down Expand Up @@ -467,6 +477,37 @@ public String getTableUniqueName() {
return tableUniqueName;
}

/**
* is local dictionary enabled for the table
* @return
*/
public boolean isLocalDictionaryEnabled() {
return isLocalDictionaryEnabled;
}

/**
* set whether local dictionary enabled or not
* @param localDictionaryEnabled
*/
public void setLocalDictionaryEnabled(boolean localDictionaryEnabled) {
isLocalDictionaryEnabled = localDictionaryEnabled;
}

/**
* @return local dictionary generation threshold
*/
public int getLocalDictionaryThreshold() {
return localDictionaryThreshold;
}

/**
* set the local dictionary generation threshold
* @param localDictionaryThreshold
*/
public void setLocalDictionaryThreshold(int localDictionaryThreshold) {
this.localDictionaryThreshold = localDictionaryThreshold;
}

/**
* build table unique name
* all should call this method to build table unique name
Expand Down Expand Up @@ -1045,5 +1086,32 @@ public static void updateTableByTableInfo(CarbonTable table, TableInfo tableInfo
}
table.hasDataMapSchema =
null != tableInfo.getDataMapSchemaList() && tableInfo.getDataMapSchemaList().size() > 0;
setLocalDictInfo(table, tableInfo);
}

/**
* This method sets whether the local dictionary is enabled or not, and the local dictionary
* threshold, if not defined default value are considered.
* @param table
* @param tableInfo
*/
private static void setLocalDictInfo(CarbonTable table, TableInfo tableInfo) {
String isLocalDictionaryEnabled = tableInfo.getFactTable().getTableProperties()
.get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE);
String localDictionaryThreshold = tableInfo.getFactTable().getTableProperties()
.get(CarbonCommonConstants.LOCAL_DICTIONARY_THRESHOLD);
if (null != isLocalDictionaryEnabled) {
table.setLocalDictionaryEnabled(Boolean.parseBoolean(isLocalDictionaryEnabled));
if (null != localDictionaryThreshold) {
table.setLocalDictionaryThreshold(Integer.parseInt(localDictionaryThreshold));
} else {
table.setLocalDictionaryThreshold(
Integer.parseInt(CarbonCommonConstants.LOCAL_DICTIONARY_THRESHOLD_DEFAULT));
}
} else {
// in case of old tables, local dictionary enable property will not be present in
// tableProperties, so disable the local dictionary generation
table.setLocalDictionaryEnabled(Boolean.parseBoolean("false"));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,25 @@ public class ColumnSchema implements Serializable, Writable {
*/
private String timeSeriesFunction = "";

/**
* set whether the column is local dictionary column or not.
*/
private boolean isLocalDictColumn = false;

/**
* @return isLocalDictColumn
*/
public boolean isLocalDictColumn() {
return isLocalDictColumn;
}

/**
* @param localDictColumn whether column is local dictionary column
*/
public void setLocalDictColumn(boolean localDictColumn) {
isLocalDictColumn = localDictColumn;
}

/**
* @return the columnName
*/
Expand Down
108 changes: 108 additions & 0 deletions core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -3004,5 +3004,113 @@ public static String getBlockId(AbsoluteTableIdentifier identifier, String fileP
}
return blockId;
}

/**
* sets the local dictionary columns to wrapper schema, if the table property
* local_dictionary_include is defined, then those columns will be set as local dictionary
* columns, if not, all the no dictionary string datatype columns are set as local dictionary
* columns.
* Handling for complexTypes::
* Since the column structure will be flat
* if the parent column is configured as local Dictionary column, then it gets the child column
* count and then sets the primitive child column as local dictionary column if it is string
* datatype column
* Handling for both localDictionary Include and exclude columns:
* There will be basically four scenarios which are
* -------------------------------------------------------
* | Local_Dictionary_include | Local_Dictionary_Exclude |
* -------------------------------------------------------
* | Not Defined | Not Defined |
* | Not Defined | Defined |
* | Defined | Not Defined |
* | Defined | Defined |
* -------------------------------------------------------
* 1. when the both local dictionary include and exclude is not defined, then set all the no
* dictionary string datatype columns as local dictionary generate columns
* 2. set all the no dictionary string datatype columns as local dictionary columns except the
* columns present in local dictionary exclude
* 3. & 4. when local dictionary include is defined, no need to check dictionary exclude columns
* configured or not, we just need to set only the columns present in local dictionary include as
* local dictionary columns
*
* @param columns
* @param mainTableProperties
*/
public static void setLocalDictColumnsToWrapperSchema(List<ColumnSchema> columns,
Map<String, String> mainTableProperties) {
String isLocalDictEnabledForMainTable =
mainTableProperties.get(CarbonCommonConstants.LOCAL_DICTIONARY_ENABLE);
String localDictIncludeColumnsOfMainTable =
mainTableProperties.get(CarbonCommonConstants.LOCAL_DICTIONARY_INCLUDE);
String localDictExcludeColumnsOfMainTable =
mainTableProperties.get(CarbonCommonConstants.LOCAL_DICTIONARY_EXCLUDE);
String[] listOfDictionaryIncludeColumns = null;
String[] listOfDictionaryExcludeColumns = null;
if (null != isLocalDictEnabledForMainTable && Boolean
.parseBoolean(isLocalDictEnabledForMainTable)) {
int childColumnCount = 0;
for (ColumnSchema column : columns) {
// for complex type columns, user gives the parent column as local dictionary column and
// only the string primitive type child column will be set as local dictionary column in the
// schema
if (childColumnCount > 0) {
if (column.getDataType().equals(DataTypes.STRING)) {
column.setLocalDictColumn(true);
childColumnCount -= 1;
} else {
childColumnCount -= 1;
}
}
// if complex column is defined in local dictionary include column, then get the child
// columns and set the string datatype child type as local dictionary column
if (column.getNumberOfChild() > 0 && null != localDictIncludeColumnsOfMainTable) {
listOfDictionaryIncludeColumns = localDictIncludeColumnsOfMainTable.split(",");
for (String dictColumn : listOfDictionaryIncludeColumns) {
if (dictColumn.trim().equalsIgnoreCase(column.getColumnName())) {
childColumnCount = column.getNumberOfChild();
}
}
}
if (null == localDictIncludeColumnsOfMainTable) {
// if local dictionary exclude columns is not defined, then set all the no dictionary
// string datatype column
if (null == localDictExcludeColumnsOfMainTable) {
// column should be no dictionary string datatype column
if (column.isDimensionColumn() && column.getDataType().equals(DataTypes.STRING)
&& !column.hasEncoding(Encoding.DICTIONARY)) {
column.setLocalDictColumn(true);
}
// if local dictionary exclude columns is defined, then set for all no dictionary string
// datatype columns except excluded columns
} else {
if (column.isDimensionColumn() && column.getDataType().equals(DataTypes.STRING)
&& !column.hasEncoding(Encoding.DICTIONARY)) {
listOfDictionaryExcludeColumns = localDictExcludeColumnsOfMainTable.split(",");
for (String excludeDictColumn : listOfDictionaryExcludeColumns) {
if (!excludeDictColumn.trim().equalsIgnoreCase(column.getColumnName())) {
column.setLocalDictColumn(true);
}
}
}
}
} else {
// if local dict columns alre not configured, set for all no dictionary string datatype
// column
if (column.isDimensionColumn() && column.getDataType().equals(DataTypes.STRING) && !column
.hasEncoding(Encoding.DICTIONARY) && localDictIncludeColumnsOfMainTable.toLowerCase()
.contains(column.getColumnName().toLowerCase())) {
if (null == listOfDictionaryIncludeColumns) {
listOfDictionaryIncludeColumns = localDictIncludeColumnsOfMainTable.split(",");
}
for (String dictColumn : listOfDictionaryIncludeColumns) {
if (dictColumn.trim().equalsIgnoreCase(column.getColumnName())) {
column.setLocalDictColumn(true);
}
}
}
}
}
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ class TestDescribeTable extends QueryTest with BeforeAndAfterAll {
test("test describe formatted table desc1") {

val resultCol = Seq("", "", "##Detailed Column property", "##Detailed Table Information", "ADAPTIVE", "CARBON Store Path", "Comment", "Database Name", "Last Update Time",
"SORT_COLUMNS", "SORT_SCOPE", "Streaming", "Table Block Size", "Table Data Size", "Table Index Size", "Table Name", "dec2col1", "dec2col2", "dec2col3", "dec2col4")
"SORT_COLUMNS", "SORT_SCOPE", "Streaming", "Table Block Size", "Local Dictionary Enabled", "Local Dictionary Threshold","Table Data Size", "Table Index Size", "Table Name", "dec2col1", "dec2col2", "dec2col3", "dec2col4")
val resultRow: Seq[Row] = resultCol map(propName => Row(f"$propName%-36s"))
checkAnswer(sql("desc formatted DESC1").select("col_name"), resultRow)
assert(sql("desc formatted desc1").count() == 20)
assert(sql("desc formatted desc1").count() == 22)
}

test("test describe formatted for partition table") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionary
import org.apache.carbondata.core.metadata.ColumnIdentifier
import org.apache.carbondata.core.metadata.datatype.{DataType => CarbonDataType, DataTypes => CarbonDataTypes, StructField => CarbonStructField}
import org.apache.carbondata.core.metadata.encoder.Encoding
import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, DataMapSchema, DataMapSchemaStorageProvider}
import org.apache.carbondata.core.metadata.schema.table.{CarbonTable, DataMapSchema}
import org.apache.carbondata.core.metadata.schema.table.column.{CarbonColumn, ColumnSchema}
import org.apache.carbondata.core.util.DataTypeUtil
import org.apache.carbondata.processing.exception.DataLoadingException
Expand Down

0 comments on commit be20fef

Please sign in to comment.