Skip to content

Commit

Permalink
Fixed Compaction for Complex types with Dictionary Include
Browse files Browse the repository at this point in the history
  • Loading branch information
manishnalla1994 committed Dec 24, 2018
1 parent 0855361 commit 0045616
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 37 deletions.
10 changes: 10 additions & 0 deletions integration/spark-common-test/src/test/resources/structofarray.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Cust00000000000000000000,2015,1,20,M,SSC,Y,123456789$2015-01-01 00:00:00$100&3000$100.123&3000.234$United Kingdom&England$2015-01-01 00:00:00&2014-01-01 00:00:00,42,104,160,325046028.8,859616748.6
Cust00000000000000000001,2015,1,30,F,Degree,N,123456790$2015-01-02 00:00:00$101&3000$101.123&3001.234$United States&MO$2015-01-02 00:00:00&2014-01-02 00:00:00,141,181,54,378476092.1,818599132.6
Cust00000000000000000002,2015,1,40,M,graduation,D,123456791$2015-01-03 00:00:00$102&3000$102.123&3002.234$United States&OR$2015-01-03 00:00:00&2014-01-03 00:00:00,138,43,175,408335001.4,906020942.6
Cust00000000000000000003,2015,1,50,F,PG,Y,123456792$2015-01-04 00:00:00$103&3000$103.123&3003.234$Australia&Victoria$2015-01-04 00:00:00&2014-01-04 00:00:00,96,63,184,493146274.5,556184083.3
Cust00000000000000000004,2015,1,60,M,MS,N,123456793$2015-01-05 00:00:00$104&3000$104.123&3004.234$United States&AL$2015-01-05 00:00:00&2014-01-05 00:00:00,115,172,165,457941392.3,641744932.5
Cust00000000000000000005,2015,1,70,F,Doctor,D,123456794$2015-01-06 00:00:00$105&3000$105.123&3005.234$United States&NJ$2015-01-06 00:00:00&2014-01-06 00:00:00,178,192,178,112452170.2,502438883.3
Cust00000000000000000006,2015,1,80,M,Layer,Y,123456795$2015-01-07 00:00:00$106&3000$106.123&3006.234$United States&IL$2015-01-07 00:00:00&2014-01-07 00:00:00,172,194,49,943273831.2,37711205.33
Cust00000000000000000007,2015,1,90,F,Cop,N,123456796$2015-01-08 00:00:00$107&3000$107.123&3007.234$United States&TN$2015-01-08 00:00:00&2014-01-08 00:00:00,163,23,180,991766321.3,452456856.7
Cust00000000000000000008,2015,1,95,M,Bank,D,123456797$2015-01-09 00:00:00$108&3000$108.123&3008.234$Israel&Tel Aviv$2015-01-09 00:00:00&2014-01-09 00:00:00,113,18,176,747561503.5,388896200.6
Cust00000000000000000009,2015,1,45,F,Group1,Y,123456798$2015-01-10 00:00:00$109&3000$109.123&3009.234$France&Ile-de-France$2015-01-10 00:00:00&2014-01-10 00:00:00,50,99,10,667010292.4,910085933.7
Original file line number Diff line number Diff line change
Expand Up @@ -1068,4 +1068,40 @@ class TestCompactionComplexType extends QueryTest with BeforeAndAfterAll {
sql("Drop table if exists adaptive")
}

test("Test major compaction for struct of array type") {
sql("DROP TABLE IF EXISTS carbon")
sql(
"CREATE TABLE carbon(CUST_ID string,YEAR int, MONTH int, AGE int, GENDER string,EDUCATED " +
"string,IS_MARRIED " +
"string," +
"STRUCT_OF_ARRAY struct<ID:int,CHECK_DATE:string,SNo:array<int>,sal1:array<double>," +
"state:array<string>," +
"date1:array<string>>,CARD_COUNT int,DEBIT_COUNT int,CREDIT_COUNT int, DEPOSIT double, " +
"HQ_DEPOSIT double) STORED BY 'carbondata'" +
"TBLPROPERTIES('DICTIONARY_INCLUDE'='STRUCT_OF_ARRAY,DEPOSIT,HQ_DEPOSIT')")
sql(
s"LOAD DATA LOCAL INPATH '$resourcesPath/structofarray.csv' INTO TABLE carbon OPTIONS" +
s"('DELIMITER'=',','QUOTECHAR'='\'," +
"'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE, GENDER,EDUCATED,IS_MARRIED,STRUCT_OF_ARRAY," +
"CARD_COUNT," +
"DEBIT_COUNT,CREDIT_COUNT, DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$', " +
"'COMPLEX_DELIMITER_LEVEL_2'='&')")
sql(
s"LOAD DATA LOCAL INPATH '$resourcesPath/structofarray.csv' INTO TABLE carbon OPTIONS" +
s"('DELIMITER'=',','QUOTECHAR'='\'," +
"'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE, GENDER,EDUCATED,IS_MARRIED,STRUCT_OF_ARRAY," +
"CARD_COUNT," +
"DEBIT_COUNT,CREDIT_COUNT, DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$', " +
"'COMPLEX_DELIMITER_LEVEL_2'='&')")
sql(
s"LOAD DATA LOCAL INPATH '$resourcesPath/structofarray.csv' INTO TABLE carbon OPTIONS" +
s"('DELIMITER'=',','QUOTECHAR'='\'," +
"'FILEHEADER'='CUST_ID,YEAR,MONTH,AGE,GENDER,EDUCATED,IS_MARRIED,STRUCT_OF_ARRAY," +
"CARD_COUNT," +
"DEBIT_COUNT,CREDIT_COUNT, DEPOSIT,HQ_DEPOSIT','COMPLEX_DELIMITER_LEVEL_1'='$', " +
"'COMPLEX_DELIMITER_LEVEL_2'='&')")
sql("ALTER TABLE carbon COMPACT 'major'")
checkAnswer(sql("Select count(*) from carbon"), Row(30))
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@

import org.apache.carbondata.core.datastore.TableSpec;
import org.apache.carbondata.core.dictionary.service.DictionaryServiceProvider;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.encoder.Encoding;
Expand Down Expand Up @@ -381,35 +379,11 @@ public boolean[] getSortColumnMapping() {
return sortColumnMapping;
}

public int[] calcDimensionLengths() {
int[] dimLensWithComplex = getCardinalityFinder().getCardinality();
if (!isSortTable()) {
for (int i = 0; i < dimLensWithComplex.length; i++) {
if (dimLensWithComplex[i] != 0) {
dimLensWithComplex[i] = Integer.MAX_VALUE;
}
}
}
List<Integer> dimsLenList = new ArrayList<Integer>();
for (int eachDimLen : dimLensWithComplex) {
if (eachDimLen != 0) dimsLenList.add(eachDimLen);
}
int[] dimLens = new int[dimsLenList.size()];
for (int i = 0; i < dimsLenList.size(); i++) {
dimLens[i] = dimsLenList.get(i);
}
return dimLens;
public int[] getCardinalityForComplexDimension() {
return getCardinalityFinder().getCardinality();
}

public KeyGenerator[] createKeyGeneratorForComplexDimension() {
int[] dimLens = calcDimensionLengths();
KeyGenerator[] complexKeyGenerators = new KeyGenerator[dimLens.length];
for (int i = 0; i < dimLens.length; i++) {
complexKeyGenerators[i] =
KeyGeneratorFactory.getKeyGenerator(new int[] { dimLens[i] });
}
return complexKeyGenerators;
}


public TableSpec getTableSpec() {
return tableSpec;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(
SegmentProperties segmentProperties =
new SegmentProperties(wrapperColumnSchema, colCardinality);

int[] dimLens = configuration.calcDimensionLengths();
int[] dimLens = CarbonDataProcessorUtil
.calcDimensionLengths(configuration.getNumberOfSortColumns(),
configuration.getCardinalityForComplexDimension());

int dimensionCount = configuration.getDimensionCount();
int noDictionaryCount = configuration.getNoDictionaryCount();
Expand Down Expand Up @@ -273,8 +275,9 @@ public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(
configuration.createKeyGeneratorForComplexDimension());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(CarbonDataProcessorUtil
.createKeyGeneratorForComplexDimension(configuration.getNumberOfSortColumns(),
configuration.getCardinalityForComplexDimension()));
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
Expand Down Expand Up @@ -356,9 +359,20 @@ public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoa
.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName),
carbonTable.getMeasureByTableName(tableName));
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
// get the cardinality for all all the columns including no dictionary columns
int[] formattedCardinality = CarbonUtil
.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
// get the cardinality for all all the columns including no
// dictionary columns and complex columns
int[] dimAndComplexColumnCardinality =
new int[segmentProperties.getDimColumnsCardinality().length + segmentProperties
.getComplexDimColumnCardinality().length];
for (int i = 0; i < segmentProperties.getDimColumnsCardinality().length; i++) {
dimAndComplexColumnCardinality[i] = segmentProperties.getDimColumnsCardinality()[i];
}
for (int i = 0; i < segmentProperties.getComplexDimColumnCardinality().length; i++) {
dimAndComplexColumnCardinality[segmentProperties.getDimColumnsCardinality().length + i] =
segmentProperties.getComplexDimColumnCardinality()[i];
}
int[] formattedCardinality =
CarbonUtil.getFormattedCardinality(dimAndComplexColumnCardinality, wrapperColumnSchema);
carbonFactDataHandlerModel.setColCardinality(formattedCardinality);

carbonFactDataHandlerModel.setComplexIndexMap(
Expand All @@ -376,6 +390,9 @@ public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoa
carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setColumnCompressor(loadModel.getColumnCompressor());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(CarbonDataProcessorUtil
.createKeyGeneratorForComplexDimension(carbonTable.getNumberOfSortColumns(),
segmentProperties.getComplexDimColumnCardinality()));

carbonFactDataHandlerModel.tableSpec = new TableSpec(carbonTable);
DataMapWriterListener listener = new DataMapWriterListener();
Expand Down Expand Up @@ -419,7 +436,7 @@ private static Map<Integer, GenericDataType> convertComplexDimensionToComplexInd

private static Map<Integer, GenericDataType> getComplexMap(String isNullFormat,
int simpleDimsCount, DataField[] dataFields) {
int surrIndex = simpleDimsCount;
int surrIndex = 0;
Iterator<Map.Entry<String, GenericDataType>> complexMap =
CarbonDataProcessorUtil.getComplexTypesMap(dataFields, isNullFormat).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(dataFields.length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ public final class CarbonFactHandlerFactory {
/**
* Creating fact handler to write data.
* @param model
* @param handlerType
* @return
*/
public static CarbonFactHandler createCarbonFactHandler(CarbonFactDataHandlerModel model) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.constants.SortScopeOptions;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
Expand Down Expand Up @@ -699,4 +701,33 @@ public static List<CarbonIterator<Object[]>>[] partitionInputReaderIterators(
return iterators;
}

public static int[] calcDimensionLengths(int numberOfSortColumns, int[] complexCardinality) {
if (!(numberOfSortColumns > 0)) {
for (int i = 0; i < complexCardinality.length; i++) {
if (complexCardinality[i] != 0) {
complexCardinality[i] = Integer.MAX_VALUE;
}
}
}
List<Integer> dimsLenList = new ArrayList<Integer>();
for (int eachDimLen : complexCardinality) {
if (eachDimLen != 0) dimsLenList.add(eachDimLen);
}
int[] dimLens = new int[dimsLenList.size()];
for (int i = 0; i < dimsLenList.size(); i++) {
dimLens[i] = dimsLenList.get(i);
}
return dimLens;
}

public static KeyGenerator[] createKeyGeneratorForComplexDimension(int numberOfSortColumns,
int[] complexCardinality) {
int[] dimLens = calcDimensionLengths(numberOfSortColumns, complexCardinality);
KeyGenerator[] complexKeyGenerators = new KeyGenerator[dimLens.length];
for (int i = 0; i < dimLens.length; i++) {
complexKeyGenerators[i] = KeyGeneratorFactory.getKeyGenerator(new int[] { dimLens[i] });
}
return complexKeyGenerators;
}

}

0 comments on commit 0045616

Please sign in to comment.