Skip to content

Commit

Permalink
Fix PrimaryAllocationIT Race Condition (elastic#37355)
Browse files Browse the repository at this point in the history
* Fix PrimaryAllocationIT Race Condition

* Forcing a stale primary allocation on a green index was tripping the assertion that was removed
   * Added a test that this case still errors out correctly
* Made the ability to wipe stopped datanode's data public on the internal test cluster and used it to ensure correct behaviour on the fixed test
   * Previously it simply passed because the test finished before the index went green and would NPE when the index was green at the time of the shard store status request, that would then come up empty
* Closes elastic#37345
  • Loading branch information
original-brownbear committed Jan 11, 2019
1 parent 359222c commit 63fe3c6
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 5 deletions.
Expand Up @@ -113,7 +113,11 @@ private void verifyThenSubmitUpdate(ClusterRerouteRequest request, ActionListene
for (Map.Entry<String, List<AbstractAllocateAllocationCommand>> entry : stalePrimaryAllocations.entrySet()) {
final String index = entry.getKey();
final ImmutableOpenIntMap<List<IndicesShardStoresResponse.StoreStatus>> indexStatus = status.get(index);
assert indexStatus != null;
if (indexStatus == null) {
// The index in the stale primary allocation request was green and hence filtered out by the store status
// request. We ignore it here since the relevant exception will be thrown by the reroute action later on.
continue;
}
for (AbstractAllocateAllocationCommand command : entry.getValue()) {
final List<IndicesShardStoresResponse.StoreStatus> shardStatus =
indexStatus.get(command.shardId());
Expand Down
Expand Up @@ -265,7 +265,6 @@ public void testForceStaleReplicaToBePromotedToPrimary() throws Exception {
assertThat(newHistoryUUIds, hasSize(1));
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37345")
public void testForceStaleReplicaToBePromotedToPrimaryOnWrongNode() throws Exception {
String master = internalCluster().startMasterOnlyNode(Settings.EMPTY);
internalCluster().startDataOnlyNodes(2);
Expand All @@ -275,7 +274,10 @@ public void testForceStaleReplicaToBePromotedToPrimaryOnWrongNode() throws Excep
.put("index.number_of_replicas", 1)).get());
ensureGreen();
createStaleReplicaScenario(master);
internalCluster().startDataOnlyNodes(2);
// Ensure the stopped primary's data is deleted so that it doesn't get picked up by the next datanode we start
internalCluster().wipePendingDataDirectories();
internalCluster().startDataOnlyNodes(1);
ensureStableCluster(3, master);
final int shardId = 0;
final List<String> nodeNames = new ArrayList<>(Arrays.asList(internalCluster().getNodeNames()));
nodeNames.remove(master);
Expand All @@ -292,6 +294,25 @@ public void testForceStaleReplicaToBePromotedToPrimaryOnWrongNode() throws Excep
equalTo("No data for shard [" + shardId + "] of index [" + idxName + "] found on node [" + nodeWithoutData + ']'));
}

public void testForceStaleReplicaToBePromotedForGreenIndex() {
internalCluster().startMasterOnlyNode(Settings.EMPTY);
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
final String idxName = "test";
assertAcked(client().admin().indices().prepareCreate(idxName)
.setSettings(Settings.builder().put("index.number_of_shards", 1)
.put("index.number_of_replicas", 1)).get());
ensureGreen();
final String nodeWithoutData = randomFrom(dataNodes);
final int shardId = 0;
IllegalArgumentException iae = expectThrows(
IllegalArgumentException.class,
() -> client().admin().cluster().prepareReroute()
.add(new AllocateStalePrimaryAllocationCommand(idxName, shardId, nodeWithoutData, true)).get());
assertThat(
iae.getMessage(),
equalTo("[allocate_stale_primary] primary [" + idxName+ "][" + shardId + "] is already assigned"));
}

public void testForceStaleReplicaToBePromotedForMissingIndex() {
internalCluster().startMasterOnlyNode(Settings.EMPTY);
final String dataNode = internalCluster().startDataOnlyNode();
Expand Down
Expand Up @@ -1398,8 +1398,7 @@ private void randomlyResetClients() {
}
}

private void wipePendingDataDirectories() {
assert Thread.holdsLock(this);
public synchronized void wipePendingDataDirectories() {
if (!dataDirToClean.isEmpty()) {
try {
for (Path path : dataDirToClean) {
Expand Down

0 comments on commit 63fe3c6

Please sign in to comment.