Skip to content

Commit

Permalink
Merge d73b0ab into eb7a833
Browse files Browse the repository at this point in the history
  • Loading branch information
manishnalla1994 committed May 3, 2019
2 parents eb7a833 + d73b0ab commit 3ffcceb
Show file tree
Hide file tree
Showing 23 changed files with 1,325 additions and 182 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1759,6 +1759,7 @@ private CarbonCommonConstants() {
public static final String ARRAY = "array";
public static final String STRUCT = "struct";
public static final String MAP = "map";
public static final String DECIMAL = "decimal";
public static final String FROM = "from";

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.carbondata.core.datastore;

public class RangeValues {
private Object minVal;
private Object maxVal;

public RangeValues(Object minVal, Object maxVal) {
this.minVal = minVal;
this.maxVal = maxVal;
}

public Object getMinVal() {
return this.minVal;
}

public Object getMaxVal() {
return this.maxVal;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1081,22 +1081,26 @@ public long size() throws IOException {
return dataSize + indexSize;
}

public void processFilterExpression(Expression filterExpression,
boolean[] isFilterDimensions, boolean[] isFilterMeasures) {
QueryModel.FilterProcessVO processVO =
new QueryModel.FilterProcessVO(getDimensionByTableName(getTableName()),
getMeasureByTableName(getTableName()), getImplicitDimensionByTableName(getTableName()));
QueryModel.processFilterExpression(processVO, filterExpression, isFilterDimensions,
isFilterMeasures, this);

public void processFilterExpression(Expression filterExpression, boolean[] isFilterDimensions,
boolean[] isFilterMeasures) {
processFilterExpressionWithoutRange(filterExpression, isFilterDimensions, isFilterMeasures);
if (null != filterExpression) {
// Optimize Filter Expression and fit RANGE filters is conditions apply.
FilterOptimizer rangeFilterOptimizer =
new RangeFilterOptmizer(filterExpression);
FilterOptimizer rangeFilterOptimizer = new RangeFilterOptmizer(filterExpression);
rangeFilterOptimizer.optimizeFilter();
}
}

public void processFilterExpressionWithoutRange(Expression filterExpression,
boolean[] isFilterDimensions, boolean[] isFilterMeasures) {
QueryModel.FilterProcessVO processVO =
new QueryModel.FilterProcessVO(getDimensionByTableName(getTableName()),
getMeasureByTableName(getTableName()), getImplicitDimensionByTableName(getTableName()));
QueryModel
.processFilterExpression(processVO, filterExpression, isFilterDimensions, isFilterMeasures,
this);
}

/**
* Resolve the filter expression.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public abstract class Expression implements Serializable {
protected List<Expression> children =
new ArrayList<Expression>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);

protected boolean isAlreadyResolved;

public abstract ExpressionResult evaluate(RowIntf value)
throws FilterUnsupportedException, FilterIllegalMemberException;

Expand All @@ -52,4 +54,12 @@ public List<Expression> getChildren() {
public abstract String getString();

public abstract String getStatement();

public boolean isAlreadyResolved() {
return isAlreadyResolved;
}

public void setAlreadyResolved(boolean alreadyResolved) {
isAlreadyResolved = alreadyResolved;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,10 @@ private FilterResolverIntf getFilterResolverBasedOnExpressionType(
// getting new dim index.
if (!currentCondExpression.getColumnList().get(0).getCarbonColumn()
.hasEncoding(Encoding.DICTIONARY) || currentCondExpression.getColumnList().get(0)
.getCarbonColumn().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
.getCarbonColumn().hasEncoding(Encoding.DIRECT_DICTIONARY) || currentCondExpression
.isAlreadyResolved()) {
// In case of Range Column Dictionary Include we do not need to resolve the range
// expression as it is already resolved and has the surrogates in the filter value
if (FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getLeft())
&& FilterUtil.checkIfExpressionContainsColumn(currentCondExpression.getRight()) || (
FilterUtil.checkIfRightExpressionRequireEvaluation(currentCondExpression.getRight())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,37 @@ private static byte[][] getFilterValuesInBytes(ColumnFilterInfo columnFilterInfo
return filterValuesList.toArray(new byte[filterValuesList.size()][]);
}

// This function is used for calculating filter values in case when Range Column
// is given as a Dictionary Include Column
private static byte[][] getFilterValueInBytesForDictRange(ColumnFilterInfo columnFilterInfo,
KeyGenerator blockLevelKeyGenerator, int[] dimColumnsCardinality, int[] keys,
List<byte[]> filterValuesList, int keyOrdinalOfDimensionFromCurrentBlock) {
if (null != columnFilterInfo) {
int[] rangesForMaskedByte =
getRangesForMaskedByte(keyOrdinalOfDimensionFromCurrentBlock, blockLevelKeyGenerator);
List<Integer> listOfsurrogates = columnFilterInfo.getFilterList();
if (listOfsurrogates == null || listOfsurrogates.size() > 1) {
throw new RuntimeException(
"Filter values cannot be null in case of range in dictionary include");
}
// Here we only get the first column as there can be only one range column.
try {
if (listOfsurrogates.get(0)
<= dimColumnsCardinality[keyOrdinalOfDimensionFromCurrentBlock]) {
keys[keyOrdinalOfDimensionFromCurrentBlock] = listOfsurrogates.get(0);
} else {
keys[keyOrdinalOfDimensionFromCurrentBlock] =
dimColumnsCardinality[keyOrdinalOfDimensionFromCurrentBlock];
}
filterValuesList
.add(getMaskedKey(rangesForMaskedByte, blockLevelKeyGenerator.generateKey(keys)));
} catch (KeyGenException e) {
LOGGER.error(e.getMessage(), e);
}
}
return filterValuesList.toArray(new byte[filterValuesList.size()][]);
}

/**
* This method will be used to get the Filter key array list for blocks which do not contain
* filter column and the column Encoding is Direct Dictionary
Expand Down Expand Up @@ -1057,10 +1088,12 @@ public static byte[][] getKeyArray(ColumnFilterInfo columnFilterInfo, boolean is
* @param columnFilterInfo
* @param carbonDimension
* @param segmentProperties
* @param isDictRange
* @return
*/
public static byte[][] getKeyArray(ColumnFilterInfo columnFilterInfo,
CarbonDimension carbonDimension, SegmentProperties segmentProperties, boolean isExclude) {
CarbonDimension carbonDimension, SegmentProperties segmentProperties, boolean isExclude,
boolean isDictRange) {
if (!carbonDimension.hasEncoding(Encoding.DICTIONARY)) {
return columnFilterInfo.getNoDictionaryFilterValuesList()
.toArray((new byte[columnFilterInfo.getNoDictionaryFilterValuesList().size()][]));
Expand All @@ -1071,8 +1104,14 @@ public static byte[][] getKeyArray(ColumnFilterInfo columnFilterInfo,
List<byte[]> filterValuesList = new ArrayList<byte[]>(20);
Arrays.fill(keys, 0);
int keyOrdinalOfDimensionFromCurrentBlock = carbonDimension.getKeyOrdinal();
return getFilterValuesInBytes(columnFilterInfo, isExclude, blockLevelKeyGenerator,
dimColumnsCardinality, keys, filterValuesList, keyOrdinalOfDimensionFromCurrentBlock);
if (!isDictRange) {
return getFilterValuesInBytes(columnFilterInfo, isExclude, blockLevelKeyGenerator,
dimColumnsCardinality, keys, filterValuesList, keyOrdinalOfDimensionFromCurrentBlock);
} else {
// For Dictionary Include Range Column
return getFilterValueInBytesForDictRange(columnFilterInfo, blockLevelKeyGenerator,
dimColumnsCardinality, keys, filterValuesList, keyOrdinalOfDimensionFromCurrentBlock);
}
}

/**
Expand Down Expand Up @@ -1500,10 +1539,11 @@ public static void prepareKeysFromSurrogates(ColumnFilterInfo filterValues,
if (filterValues == null) {
dimColumnExecuterInfo.setFilterKeys(new byte[0][]);
} else {
byte[][] keysBasedOnFilter = getKeyArray(filterValues, dimension, segmentProperties, false);
byte[][] keysBasedOnFilter =
getKeyArray(filterValues, dimension, segmentProperties, false, false);
if (!filterValues.isIncludeFilter() || filterValues.isOptimized()) {
dimColumnExecuterInfo
.setExcludeFilterKeys(getKeyArray(filterValues, dimension, segmentProperties, true));
dimColumnExecuterInfo.setExcludeFilterKeys(
getKeyArray(filterValues, dimension, segmentProperties, true, false));
}
dimColumnExecuterInfo.setFilterKeys(keysBasedOnFilter);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ public byte[][] getFilterRangeValues(SegmentProperties segmentProperties) {
} else if (null != dimColResolvedFilterInfo.getFilterValues() && dimColResolvedFilterInfo
.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
return FilterUtil.getKeyArray(this.dimColResolvedFilterInfo.getFilterValues(),
this.dimColResolvedFilterInfo.getDimension(), segmentProperties, false);
this.dimColResolvedFilterInfo.getDimension(), segmentProperties, false, false);
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,18 @@ public byte[][] getFilterRangeValues(SegmentProperties segmentProperties) {
.getDimensionFromCurrentBlock(this.dimColEvaluatorInfoList.get(0).getDimension());
if (null != dimensionFromCurrentBlock) {
return FilterUtil.getKeyArray(this.dimColEvaluatorInfoList.get(0).getFilterValues(),
dimensionFromCurrentBlock, segmentProperties, false);
dimensionFromCurrentBlock, segmentProperties, false, false);
} else {
return FilterUtil.getKeyArray(this.dimColEvaluatorInfoList.get(0).getFilterValues(), false);
}
} else if (dimColEvaluatorInfoList.size() > 0 && null != dimColEvaluatorInfoList.get(0)
.getFilterValues() && dimColEvaluatorInfoList.get(0).getDimension()
.hasEncoding(Encoding.DICTIONARY)) {
CarbonDimension dimensionFromCurrentBlock = segmentProperties
.getDimensionFromCurrentBlock(this.dimColEvaluatorInfoList.get(0).getDimension());
if (null != dimensionFromCurrentBlock) {
return FilterUtil.getKeyArray(this.dimColEvaluatorInfoList.get(0).getFilterValues(),
dimensionFromCurrentBlock, segmentProperties, false, true);
} else {
return FilterUtil.getKeyArray(this.dimColEvaluatorInfoList.get(0).getFilterValues(), false);
}
Expand Down Expand Up @@ -249,6 +260,13 @@ public void resolve(AbsoluteTableIdentifier absoluteTableIdentifier)
} else {
filterInfo.setFilterList(getDirectSurrogateValues(columnExpression));
}
} else if (columnExpression.getDimension().hasEncoding(Encoding.DICTIONARY)
&& !columnExpression.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
if (!isIncludeFilter) {
filterInfo.setExcludeFilterList(getSurrogateValues());
} else {
filterInfo.setFilterList(getSurrogateValues());
}
} else {
filterInfo.setFilterListForNoDictionaryCols(getNoDictionaryRangeValues());
}
Expand Down Expand Up @@ -303,6 +321,26 @@ private List<Integer> getDirectSurrogateValues(ColumnExpression columnExpression
return filterValuesList;
}

private List<Integer> getSurrogateValues() throws FilterUnsupportedException {
List<ExpressionResult> listOfExpressionResults = new ArrayList<ExpressionResult>(20);

if (this.getFilterExpression() instanceof BinaryConditionalExpression) {
listOfExpressionResults =
((BinaryConditionalExpression) this.getFilterExpression()).getLiterals();
}
List<Integer> filterValuesList = new ArrayList<Integer>(20);
try {
// If any filter member provided by user is invalid throw error else
// system can display inconsistent result.
for (ExpressionResult result : listOfExpressionResults) {
filterValuesList.add(result.getInt());
}
} catch (FilterIllegalMemberException e) {
throw new FilterUnsupportedException(e);
}
return filterValuesList;
}

/**
* Method will return the DimColumnResolvedFilterInfo instance which consists
* the mapping of the respective dimension and its surrogates involved in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class QueryModelBuilder {
private DataTypeConverter dataTypeConverter;
private boolean forcedDetailRawQuery;
private boolean readPageByPage;
private boolean convertToRangeFilter = true;
/**
* log information
*/
Expand Down Expand Up @@ -301,6 +302,15 @@ public QueryModelBuilder enableForcedDetailRawQuery() {
return this;
}

public QueryModelBuilder convertToRangeFilter(boolean convertToRangeFilter) {
this.convertToRangeFilter = convertToRangeFilter;
return this;
}

public boolean isConvertToRangeFilter() {
return this.convertToRangeFilter;
}

public void enableReadPageByPage() {
this.readPageByPage = true;
}
Expand All @@ -316,7 +326,13 @@ public QueryModel build() {
// set the filter to the query model in order to filter blocklet before scan
boolean[] isFilterDimensions = new boolean[table.getDimensionOrdinalMax()];
boolean[] isFilterMeasures = new boolean[table.getAllMeasures().size()];
table.processFilterExpression(filterExpression, isFilterDimensions, isFilterMeasures);
// In case of Dictionary Include Range Column we donot optimize the range expression
if (isConvertToRangeFilter()) {
table.processFilterExpression(filterExpression, isFilterDimensions, isFilterMeasures);
} else {
table.processFilterExpressionWithoutRange(filterExpression, isFilterDimensions,
isFilterMeasures);
}
queryModel.setIsFilterDimensions(isFilterDimensions);
queryModel.setIsFilterMeasures(isFilterMeasures);
FilterResolverIntf filterIntf =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,19 +518,11 @@ protected byte[][] getNoDictionaryKeyArray(int rowId) {
* @param batchSize
* @return
*/
protected void fillValidRowIdsBatchFilling(int rowId, int batchSize) {
// row id will be different for every batch so clear it before filling
clearValidRowIdList();
int startPosition = rowId;
for (int i = 0; i < batchSize; i++) {
if (!containsDeletedRow(startPosition)) {
validRowIds.add(startPosition);
}
startPosition++;
}
}

private void clearValidRowIdList() {

public abstract void fillValidRowIdsBatchFilling(int rowId, int batchSize);

protected void clearValidRowIdList() {
if (null != validRowIds && !validRowIds.isEmpty()) {
validRowIds.clear();
}
Expand Down Expand Up @@ -773,7 +765,30 @@ public int getRowCounter() {
* @param batchSize
* @return
*/
public abstract List<byte[]> getDictionaryKeyArrayBatch(int batchSize);
public List<byte[]> getDictionaryKeyArrayBatch(int batchSize) {
// rowId from where computing need to start
int startRowId = currentRow + 1;
fillValidRowIdsBatchFilling(startRowId, batchSize);
List<byte[]> dictionaryKeyArrayList = new ArrayList<>(validRowIds.size());
int[] columnDataOffsets = null;
byte[] completeKey = null;
// everyTime it is initialized new as in case of prefetch it can modify the data
for (int i = 0; i < validRowIds.size(); i++) {
completeKey = new byte[fixedLengthKeySize];
dictionaryKeyArrayList.add(completeKey);
}
// initialize offset array onli if data is present
if (this.dictionaryColumnChunkIndexes.length > 0) {
columnDataOffsets = new int[validRowIds.size()];
}
for (int i = 0; i < this.dictionaryColumnChunkIndexes.length; i++) {
for (int j = 0; j < validRowIds.size(); j++) {
columnDataOffsets[j] += dimensionColumnPages[dictionaryColumnChunkIndexes[i]][pageCounter]
.fillRawData(validRowIds.get(j), columnDataOffsets[j], dictionaryKeyArrayList.get(j));
}
}
return dictionaryKeyArrayList;
}

/**
* Below method will be used to get the complex type key array
Expand Down Expand Up @@ -806,7 +821,26 @@ public int getRowCounter() {
*
* @return no dictionary keys for all no dictionary dimension
*/
public abstract List<byte[][]> getNoDictionaryKeyArrayBatch(int batchSize);
public List<byte[][]> getNoDictionaryKeyArrayBatch(int batchSize) {
List<byte[][]> noDictionaryKeyArrayList = new ArrayList<>(validRowIds.size());
byte[][] noDictionaryColumnsKeys = null;
// everyTime it is initialized new as in case of prefetch it can modify the data
for (int i = 0; i < validRowIds.size(); i++) {
noDictionaryColumnsKeys = new byte[noDictionaryColumnChunkIndexes.length][];
noDictionaryKeyArrayList.add(noDictionaryColumnsKeys);
}
int columnPosition = 0;
for (int i = 0; i < this.noDictionaryColumnChunkIndexes.length; i++) {
for (int j = 0; j < validRowIds.size(); j++) {
byte[][] noDictionaryArray = noDictionaryKeyArrayList.get(j);
noDictionaryArray[columnPosition] =
dimensionColumnPages[noDictionaryColumnChunkIndexes[i]][pageCounter]
.getChunkData(validRowIds.get(j));
}
columnPosition++;
}
return noDictionaryKeyArrayList;
}

/**
* Mark the filtered rows in columnar batch. These rows will not be added to vector batches later.
Expand Down

0 comments on commit 3ffcceb

Please sign in to comment.