Skip to content

Commit

Permalink
Add Zstandard compression support with JMH benchmarking(#6804)
Browse files Browse the repository at this point in the history
  • Loading branch information
GSharayu committed May 5, 2021
1 parent fe596b6 commit a20535d
Show file tree
Hide file tree
Showing 21 changed files with 1,086 additions and 17 deletions.
4 changes: 4 additions & 0 deletions pinot-common/pom.xml
Expand Up @@ -139,6 +139,10 @@
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</dependency>
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
Expand Down
Expand Up @@ -228,8 +228,8 @@ public void testSerDe()
properties.put("foo", "bar");
properties.put("foobar", "potato");
List<FieldConfig> fieldConfigList = Arrays.asList(
new FieldConfig("column1", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, properties),
new FieldConfig("column2", null, null, null));
new FieldConfig("column1", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.INVERTED, null, properties),
new FieldConfig("column2", null, null, null, null));
TableConfig tableConfig = tableConfigBuilder.setFieldConfigList(fieldConfigList).build();

checkFieldConfig(tableConfig);
Expand Down
Expand Up @@ -641,7 +641,7 @@ public void testValidateFieldConfig() {

try {
FieldConfig fieldConfig =
new FieldConfig("myCol1", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null);
new FieldConfig("myCol1", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail for with conflicting encoding type of myCol1");
Expand All @@ -654,7 +654,7 @@ public void testValidateFieldConfig() {
.setNoDictionaryColumns(Arrays.asList("myCol1")).build();
try {
FieldConfig fieldConfig =
new FieldConfig("myCol1", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.FST, null);
new FieldConfig("myCol1", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.FST, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since FST index is enabled on RAW encoding type");
Expand All @@ -665,7 +665,7 @@ public void testValidateFieldConfig() {
tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
try {
FieldConfig fieldConfig =
new FieldConfig("myCol2", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null);
new FieldConfig("myCol2", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since FST index is enabled on multi value column");
Expand All @@ -676,7 +676,7 @@ public void testValidateFieldConfig() {
tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
try {
FieldConfig fieldConfig =
new FieldConfig("intCol", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null);
new FieldConfig("intCol", FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since FST index is enabled on non String column");
Expand All @@ -688,7 +688,7 @@ public void testValidateFieldConfig() {
.setNoDictionaryColumns(Arrays.asList("myCol2", "intCol")).build();
try {
FieldConfig fieldConfig =
new FieldConfig("myCol2", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null);
new FieldConfig("myCol2", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since TEXT index is enabled on multi value column");
Expand All @@ -700,7 +700,7 @@ public void testValidateFieldConfig() {
.setNoDictionaryColumns(Arrays.asList("myCol2", "intCol")).build();
try {
FieldConfig fieldConfig =
new FieldConfig("intCol", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null);
new FieldConfig("intCol", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.TEXT, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since TEXT index is enabled on non String column");
Expand All @@ -712,14 +712,36 @@ public void testValidateFieldConfig() {
.setNoDictionaryColumns(Arrays.asList("myCol1")).build();
try {
FieldConfig fieldConfig =
new FieldConfig("myCol21", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.FST, null);
new FieldConfig("myCol21", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.FST, null, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since field name is not present in schema");
} catch (Exception e) {
Assert.assertEquals(e.getMessage(),
"Column Name myCol21 defined in field config list must be a valid column defined in the schema");
}

tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
try {
FieldConfig fieldConfig =
new FieldConfig("intCol", FieldConfig.EncodingType.DICTIONARY, null, FieldConfig.CompressionCodec.SNAPPY, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since dictionary encoding does not support compression codec snappy");
} catch (Exception e) {
Assert.assertEquals(e.getMessage(), "Set compression codec to null for dictionary encoding type");
}

tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build();
try {
FieldConfig fieldConfig =
new FieldConfig("intCol", FieldConfig.EncodingType.DICTIONARY, null, FieldConfig.CompressionCodec.ZSTANDARD, null);
tableConfig.setFieldConfigList(Arrays.asList(fieldConfig));
TableConfigUtils.validate(tableConfig, schema);
Assert.fail("Should fail since dictionary encoding does not support compression codec zstandard");
} catch (Exception e) {
Assert.assertEquals(e.getMessage(), "Set compression codec to null for dictionary encoding type");
}
}

@Test
Expand Down Expand Up @@ -888,7 +910,7 @@ public void testValidateIndexingConfig() {
// expected
}

FieldConfig fieldConfig = new FieldConfig("myCol2", null, null, null);
FieldConfig fieldConfig = new FieldConfig("myCol2", null, null, null, null);
tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
.setFieldConfigList(Arrays.asList(fieldConfig)).build();
try {
Expand Down
Expand Up @@ -156,8 +156,8 @@ private void buildSegment()
List<GenericRow> rows = createTestData(NUM_ROWS);
List<FieldConfig> fieldConfigs = new ArrayList<>();
fieldConfigs
.add(new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null));
fieldConfigs.add(new FieldConfig(URL_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null));
.add(new FieldConfig(DOMAIN_NAMES_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null));
fieldConfigs.add(new FieldConfig(URL_COL, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.FST, null, null));

TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
.setInvertedIndexColumns(Arrays.asList(DOMAIN_NAMES_COL)).setFieldConfigList(fieldConfigs).build();
Expand Down
Expand Up @@ -72,7 +72,7 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
private static final TableConfig TABLE_CONFIG = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
.setFieldConfigList(Collections.singletonList(
new FieldConfig(H3_INDEX_COLUMN, FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3,
H3_INDEX_PROPERTIES))).build();
null, H3_INDEX_PROPERTIES))).build();

private IndexSegment _indexSegment;

Expand Down

0 comments on commit a20535d

Please sign in to comment.