Skip to content

Commit

Permalink
[7.15] Fix SearchableSnapshotsPersistentCacheIntegTests (#80077) (#80263
Browse files Browse the repository at this point in the history
)

* Fix SearchableSnapshotsPersistentCacheIntegTests (#80077)

The two tests in SearchableSnapshotsPersistentCacheIntegTests 
failed few times when they were expecting the persistent cache to 
be empty after mounted indices deletions.

While I wasn't able to reproduce the issue (which happened ~5 
times in the last 2 months) I suspect the cause are the same: the 
tests wait for the mounted index to be green but don't always wait 
for the prewarming to be completed. I think it is possible that a 
part of a cache file finishes to be prewarmed after the test is 
completed, making the next step to fail as the persistent cache 
will contain a doc that is not related to the subsequent test.

This commit changes the testCacheSurviveRestart so that it 
waits for the prewarming to complete. Similarly it changes t
estPersistentCacheCleanUpAfterRelocation to also wait for 
the prewarming to complete (just in case, this test verifies that 
the recovery is done). It also logs more information about the 
emptiness of the persistent cache on data nodes and makes 
the cluster scope to TEST to ensure that a dedicated test 
cluster is used for each test.

Closes #76159
Closes #76160

* fixes
  • Loading branch information
tlrx committed Nov 3, 2021
1 parent 78fa1cc commit e5ab3eb
Showing 1 changed file with 41 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

package org.elasticsearch.xpack.searchablesnapshots.cache.full;

import com.carrotsearch.hppc.cursors.ObjectCursor;

import org.apache.lucene.document.Document;
import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
Expand All @@ -24,6 +26,7 @@
import org.elasticsearch.repositories.fs.FsRepository;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.test.BackgroundIndexer;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalTestCluster;
import org.elasticsearch.xpack.searchablesnapshots.BaseSearchableSnapshotsIntegTestCase;
import org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshots;
Expand All @@ -46,6 +49,7 @@
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.notNullValue;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
public class SearchableSnapshotsPersistentCacheIntegTests extends BaseSearchableSnapshotsIntegTestCase {

@Override
Expand Down Expand Up @@ -87,6 +91,9 @@ public void testCacheSurviveRestart() throws Exception {
);
ensureGreen(restoredIndexName);

assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);

final Index restoredIndex = client().admin()
.cluster()
.prepareState()
Expand Down Expand Up @@ -147,20 +154,20 @@ public Settings onNodeStopped(String nodeName) {
}
});

ensureGreen(restoredIndexName);

assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);

final CacheService cacheServiceAfterRestart = internalCluster().getInstance(CacheService.class, dataNode);
final PersistentCache persistentCacheAfterRestart = cacheServiceAfterRestart.getPersistentCache();
ensureGreen(restoredIndexName);

cacheFiles.forEach(cacheFile -> assertTrue(cacheFile + " should have survived node restart", Files.exists(cacheFile)));
assertThat("Cache files should be loaded in cache", persistentCacheAfterRestart.getNumDocs(), equalTo((long) cacheFiles.size()));

assertAcked(client().admin().indices().prepareDelete(restoredIndexName));

assertBusy(() -> {
cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile)));
cacheServiceAfterRestart.synchronizeCache();
assertThat(persistentCacheAfterRestart.getNumDocs(), equalTo(0L));
});
assertBusy(() -> cacheFiles.forEach(cacheFile -> assertFalse(cacheFile + " should have been cleaned up", Files.exists(cacheFile))));
assertEmptyPersistentCacheOnDataNodes();
}

public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
Expand All @@ -187,6 +194,7 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
final int numDocs = scaledRandomIntBetween(1_000, 5_000);
try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numDocs)) {
waitForDocs(numDocs, indexer);
indexer.stopAndAwaitStopped();
}
refresh(indexName);

Expand Down Expand Up @@ -214,7 +222,6 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {
.cluster()
.prepareState()
.clear()
.setRoutingTable(true)
.setMetadata(true)
.setIndices(mountedIndexName)
.get();
Expand Down Expand Up @@ -248,6 +255,9 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {

ensureGreen(mountedIndexName);

assertExecutorIsIdle(SearchableSnapshots.CACHE_FETCH_ASYNC_THREAD_POOL_NAME);
assertExecutorIsIdle(SearchableSnapshots.CACHE_PREWARMING_THREAD_POOL_NAME);

recoveryResponse = client().admin().indices().prepareRecoveries(mountedIndexName).get();
assertTrue(recoveryResponse.shardRecoveryStates().containsKey(mountedIndexName));
assertTrue(
Expand All @@ -271,12 +281,29 @@ public void testPersistentCacheCleanUpAfterRelocation() throws Exception {

logger.info("--> deleting mounted index {}", mountedIndex);
assertAcked(client().admin().indices().prepareDelete(mountedIndexName));
assertEmptyPersistentCacheOnDataNodes();
}

assertBusy(() -> {
for (CacheService cacheService : internalCluster().getDataNodeInstances(CacheService.class)) {
cacheService.synchronizeCache();
assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L));
}
});
private void assertEmptyPersistentCacheOnDataNodes() throws Exception {
final Set<DiscoveryNode> dataNodes = new HashSet<>();
for (ObjectCursor<DiscoveryNode> cursor : getDiscoveryNodes().getDataNodes().values()) {
dataNodes.add(cursor.value);
}
logger.info("--> verifying persistent caches are empty on nodes... {}", dataNodes);
try {
assertBusy(() -> {
for (DiscoveryNode node : org.elasticsearch.core.List.copyOf(dataNodes)) {
final CacheService cacheService = internalCluster().getInstance(CacheService.class, node.getName());
cacheService.synchronizeCache();
assertThat(cacheService.getPersistentCache().getNumDocs(), equalTo(0L));
logger.info("--> persistent cache is empty on node {}", node);
dataNodes.remove(node);
}
});
logger.info("--> all persistent caches are empty");
} catch (AssertionError ae) {
logger.error("--> persistent caches not empty on nodes: {}", dataNodes);
throw ae;
}
}
}

0 comments on commit e5ab3eb

Please sign in to comment.