Skip to content

Commit

Permalink
When removing an AutoFollower also mark it as removed. (#37402)
Browse files Browse the repository at this point in the history
Currently when there are no more auto follow patterns for a remote cluster then
the AutoFollower instance for this remote cluster will be removed. If
a new auto follow pattern for this remote cluster gets added quickly enough
after the last delete then there may be two AutoFollower instance running
for this remote cluster instead of one.

Each AutoFollower instance stops automatically after it sees in the
start() method that there are no more auto follow patterns for the
remote cluster it is tracking. However when an auto follow pattern
gets removed and then added back quickly enough then old AutoFollower
may never detect that at some point there were no auto follow patterns
for the remote cluster it is monitoring. The creation and removal of
an AutoFollower instance happens independently in the `updateAutoFollowers()`
as part of a cluster state update.

By adding the `removed` field, an AutoFollower instance will not miss the
fact there were no auto follow patterns at some point in time. The
`updateAutoFollowers()` method now marks an AutoFollower instance as
removed when it sees that there are no more patterns for a remote cluster.
The updateAutoFollowers() method can then safely start a new AutoFollower
instance.

Relates to #36761
  • Loading branch information
martijnvg committed Jan 15, 2019
1 parent 7170ebe commit 850fbec
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
Expand Up @@ -253,18 +253,33 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS
.anyMatch(pattern -> pattern.getRemoteCluster().equals(remoteCluster));
if (exist == false) {
LOGGER.info("removing auto follower for remote cluster [{}]", remoteCluster);
autoFollower.removed = true;
removedRemoteClusters.add(remoteCluster);
} else if (autoFollower.remoteClusterConnectionMissing) {
LOGGER.info("retrying auto follower [{}] after remote cluster connection was missing", remoteCluster);
autoFollower.remoteClusterConnectionMissing = false;
autoFollower.start();
}
}
assert assertNoOtherActiveAutoFollower(newAutoFollowers);
this.autoFollowers = autoFollowers
.copyAndPutAll(newAutoFollowers)
.copyAndRemoveAll(removedRemoteClusters);
}

private boolean assertNoOtherActiveAutoFollower(Map<String, AutoFollower> newAutoFollowers) {
for (AutoFollower newAutoFollower : newAutoFollowers.values()) {
AutoFollower previousInstance = autoFollowers.get(newAutoFollower.remoteCluster);
assert previousInstance == null || previousInstance.removed;
}
return true;
}


Map<String, AutoFollower> getAutoFollowers() {
return autoFollowers;
}

@Override
public void clusterChanged(ClusterChangedEvent event) {
if (event.localNodeMaster()) {
Expand All @@ -290,6 +305,7 @@ abstract static class AutoFollower {
private volatile long lastAutoFollowTimeInMillis = -1;
private volatile long metadataVersion = 0;
private volatile boolean remoteClusterConnectionMissing = false;
volatile boolean removed = false;
private volatile CountDown autoFollowPatternsCountDown;
private volatile AtomicArray<AutoFollowResult> autoFollowResults;

Expand All @@ -304,6 +320,17 @@ abstract static class AutoFollower {
}

void start() {
if (removed) {
// This check exists to avoid two AutoFollower instances a single remote cluster.
// (If an auto follow pattern is deleted and then added back quickly enough then
// the old AutoFollower instance still sees that there is an auto follow pattern
// for the remote cluster it is tracking and will continue to operate, while in
// the meantime in updateAutoFollowers() method another AutoFollower instance has been
// started for the same remote cluster.)
LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster);
return;
}

lastAutoFollowTimeInMillis = relativeTimeProvider.getAsLong();
final ClusterState clusterState = followerClusterStateSupplier.get();
final AutoFollowMetadata autoFollowMetadata = clusterState.metaData().custom(AutoFollowMetadata.TYPE);
Expand All @@ -325,6 +352,12 @@ void start() {
this.autoFollowResults = new AtomicArray<>(patterns.size());

getRemoteClusterState(remoteCluster, metadataVersion + 1, (remoteClusterStateResponse, remoteError) -> {
// Also check removed flag here, as it may take a while for this remote cluster state api call to return:
if (removed) {
LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster);
return;
}

if (remoteClusterStateResponse != null) {
assert remoteError == null;
if (remoteClusterStateResponse.isWaitForTimedOut()) {
Expand Down
Expand Up @@ -157,15 +157,20 @@ public void testAutoFollowManyIndices() throws Exception {
int expectedVal2 = numIndices;

MetaData[] metaData = new MetaData[1];
AutoFollowStats[] autoFollowStats = new AutoFollowStats[1];
try {
assertBusy(() -> {
metaData[0] = followerClient().admin().cluster().prepareState().get().getState().metaData();
autoFollowStats[0] = getAutoFollowStats();
int count = (int) Arrays.stream(metaData[0].getConcreteAllIndices()).filter(s -> s.startsWith("copy-")).count();
assertThat(count, equalTo(expectedVal2));
// Ensure that there are no auto follow errors:
// (added specifically to see that there are no leader indices auto followed multiple times)
assertThat(autoFollowStats[0].getRecentAutoFollowErrors().size(), equalTo(0));
});
} catch (AssertionError ae) {
logger.warn("metadata={}", Strings.toString(metaData[0]));
logger.warn("auto follow stats={}", Strings.toString(getAutoFollowStats()));
logger.warn("auto follow stats={}", Strings.toString(autoFollowStats[0]));
throw ae;
}
}
Expand Down
Expand Up @@ -620,6 +620,10 @@ public void testUpdateAutoFollowers() {
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue());
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
// Get a reference to auto follower that will get removed, so that we can assert that it has been marked as removed,
// when pattern 1 and 3 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster)
AutoFollowCoordinator.AutoFollower removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1");
assertThat(removedAutoFollower1.removed, is(false));
// Remove patterns 1 and 3:
patterns.remove("pattern1");
patterns.remove("pattern3");
Expand All @@ -630,6 +634,7 @@ public void testUpdateAutoFollowers() {
autoFollowCoordinator.updateAutoFollowers(clusterState);
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(1));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
assertThat(removedAutoFollower1.removed, is(true));
// Add pattern 4:
patterns.put("pattern4", new AutoFollowPattern("remote1", Collections.singletonList("metrics-*"), null, null, null,
null, null, null, null, null, null, null, null));
Expand All @@ -641,7 +646,13 @@ public void testUpdateAutoFollowers() {
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue());
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
// Get references to auto followers that will get removed, so that we can assert that those have been marked as removed,
// when pattern 2 and 4 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster)
removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1");
AutoFollower removedAutoFollower2 = autoFollowCoordinator.getAutoFollowers().get("remote2");
// Remove patterns 2 and 4:
assertThat(removedAutoFollower1.removed, is(false));
assertThat(removedAutoFollower2.removed, is(false));
patterns.remove("pattern2");
patterns.remove("pattern4");
clusterState = ClusterState.builder(new ClusterName("remote"))
Expand All @@ -650,6 +661,8 @@ public void testUpdateAutoFollowers() {
.build();
autoFollowCoordinator.updateAutoFollowers(clusterState);
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(0));
assertThat(removedAutoFollower1.removed, is(true));
assertThat(removedAutoFollower2.removed, is(true));
}

public void testUpdateAutoFollowersNoPatterns() {
Expand Down

0 comments on commit 850fbec

Please sign in to comment.