Skip to content

Commit

Permalink
Moving zstd out of sandbox (opensearch-project#7908)
Browse files Browse the repository at this point in the history
* Adding zstd module to source

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Removing zstd module from sandbox

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Added tests and refactoring

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Fixing gradle issues

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* flaky test

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* fixing precommit failure

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Incorporate review comments and fixed precommit failures

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Incorporating review comments

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Incorporating review comments

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Adding Integ tests

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* Incorporating review comments

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

---------

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
Signed-off-by: Daniel (dB.) Doubrovkine <dblock@amazon.com>
Co-authored-by: Daniel (dB.) Doubrovkine <dblock@amazon.com>
  • Loading branch information
2 people authored and baba-devv committed Jul 29, 2023
1 parent d322fd7 commit 0e8e8dc
Show file tree
Hide file tree
Showing 21 changed files with 524 additions and 102 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Snapshot Interop] Add Changes in Create Snapshot Flow for remote store interoperability. ([#7118](https://github.com/opensearch-project/OpenSearch/pull/7118))
- Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496))
- Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
- Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec;

import org.opensearch.action.admin.indices.flush.FlushResponse;
import org.opensearch.action.admin.indices.refresh.RefreshResponse;
import org.opensearch.action.admin.indices.segments.IndicesSegmentsRequest;
import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.opensearch.action.support.ActiveShardCount;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.engine.Segment;
import org.opensearch.index.reindex.BulkByScrollResponse;
import org.opensearch.index.reindex.ReindexAction;
import org.opensearch.index.reindex.ReindexRequestBuilder;
import org.opensearch.index.reindex.ReindexTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static java.util.stream.Collectors.toList;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_METADATA;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_READ;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY;
import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_READ_ONLY_ALLOW_DELETE;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures;

public class MultiCodecReindexIT extends ReindexTestCase {

public void testReindexingMultipleCodecs() throws InterruptedException, ExecutionException {
internalCluster().ensureAtLeastNumDataNodes(1);
Map<String, String> codecMap = Map.of(
"best_compression",
"BEST_COMPRESSION",
"zstd_no_dict",
"ZSTD_NO_DICT",
"zstd",
"ZSTD",
"default",
"BEST_SPEED"
);

for (Map.Entry<String, String> codec : codecMap.entrySet()) {
assertReindexingWithMultipleCodecs(codec.getKey(), codec.getValue(), codecMap);
}

}

private void assertReindexingWithMultipleCodecs(String destCodec, String destCodecMode, Map<String, String> codecMap)
throws ExecutionException, InterruptedException {

final String index = "test-index" + destCodec;
final String destIndex = "dest-index" + destCodec;

// creating source index
createIndex(
index,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", "default")
.put("index.merge.policy.max_merged_segment", "1b")
.build()
);
ensureGreen(index);

final int nbDocs = randomIntBetween(2, 5);

// indexing with all 4 codecs
for (Map.Entry<String, String> codec : codecMap.entrySet()) {
useCodec(index, codec.getKey());
ingestDocs(index, nbDocs);
}

assertTrue(
getSegments(index).stream()
.flatMap(s -> s.getAttributes().values().stream())
.collect(Collectors.toSet())
.containsAll(codecMap.values())
);

// creating destination index with destination codec
createIndex(
destIndex,
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", destCodec)
.build()
);

BulkByScrollResponse bulkResponse = new ReindexRequestBuilder(client(), ReindexAction.INSTANCE).source(index)
.destination(destIndex)
.refresh(true)
.waitForActiveShards(ActiveShardCount.ONE)
.get();

assertEquals(codecMap.size() * nbDocs, bulkResponse.getCreated());
assertEquals(codecMap.size() * nbDocs, bulkResponse.getTotal());
assertEquals(0, bulkResponse.getDeleted());
assertEquals(0, bulkResponse.getNoops());
assertEquals(0, bulkResponse.getVersionConflicts());
assertEquals(1, bulkResponse.getBatches());
assertTrue(bulkResponse.getTook().getMillis() > 0);
assertEquals(0, bulkResponse.getBulkFailures().size());
assertEquals(0, bulkResponse.getSearchFailures().size());
assertTrue(getSegments(destIndex).stream().allMatch(segment -> segment.attributes.containsValue(destCodecMode)));
}

private void useCodec(String index, String codec) throws ExecutionException, InterruptedException {
assertAcked(client().admin().indices().prepareClose(index));

assertAcked(
client().admin()
.indices()
.updateSettings(new UpdateSettingsRequest(index).settings(Settings.builder().put("index.codec", codec)))
.get()
);

assertAcked(client().admin().indices().prepareOpen(index));
}

private void flushAndRefreshIndex(String index) {

// Request is not blocked
for (String blockSetting : Arrays.asList(
SETTING_BLOCKS_READ,
SETTING_BLOCKS_WRITE,
SETTING_READ_ONLY,
SETTING_BLOCKS_METADATA,
SETTING_READ_ONLY_ALLOW_DELETE
)) {
try {
enableIndexBlock(index, blockSetting);
// flush
FlushResponse flushResponse = client().admin().indices().prepareFlush(index).setForce(true).execute().actionGet();
assertNoFailures(flushResponse);

// refresh
RefreshResponse refreshResponse = client().admin().indices().prepareRefresh(index).execute().actionGet();
assertNoFailures(refreshResponse);
} finally {
disableIndexBlock(index, blockSetting);
}
}
}

private void ingestDocs(String index, int nbDocs) throws InterruptedException {

indexRandom(
randomBoolean(),
false,
randomBoolean(),
IntStream.range(0, nbDocs)
.mapToObj(i -> client().prepareIndex(index).setId(UUID.randomUUID().toString()).setSource("num", i))
.collect(toList())
);
flushAndRefreshIndex(index);
}

private ArrayList<Segment> getSegments(String index) {

return new ArrayList<>(
client().admin()
.indices()
.segments(new IndicesSegmentsRequest(index))
.actionGet()
.getIndices()
.get(index)
.getShards()
.get(0)
.getShards()[0].getSegments()
);
}

}
28 changes: 0 additions & 28 deletions sandbox/plugins/custom-codecs/build.gradle

This file was deleted.

This file was deleted.

This file was deleted.

0 comments on commit 0e8e8dc

Please sign in to comment.