Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix missing historyUUID in peer recovery when rolling upgrade 5.x to 6.3 #31506

Merged
merged 4 commits into from
Jun 22, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.http.entity.StringEntity;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.Version;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.Booleans;
Expand Down Expand Up @@ -788,6 +789,45 @@ public void testRecovery() throws Exception {
}
}

/**
* Tests that a synced-flushed index is correctly recovered.
* This might be an edge-case from 5.x to 6.x since a 5.x index commit does not have all required 6.x commit tags.
*/
public void testRecoverySealedIndex() throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testRecoverSyncedFlushIndex

int count;
if (runningAgainstOldCluster) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe fold this if-else into the next one.

count = randomInt(10);
} else {
count = countOfIndexedRandomDocuments();
}
if (runningAgainstOldCluster) {
Settings.Builder settings = Settings.builder()
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)
// if the node with the replica is the first to be restarted, while a replica is still recovering
// then delayed allocation will kick in. When the node comes back, the master will search for a copy
// but the recovering copy will be seen as invalid and the cluster health won't return to GREEN
// before timing out
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms")
.put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster
createIndex(index, settings.build());
indexRandomDocuments(count, randomBoolean(), randomBoolean(),
n -> JsonXContent.contentBuilder().startObject().field("key", "value").endObject());
assertBusy(() -> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we replace this assertBusy by an ensureGreen(index)?

Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
assertOK(resp);
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
assertThat(result.get("successful"), equalTo(2));
});
}
ensureGreen(index);
refresh();
Request countRequest = new Request("GET", "/" + index + "/_search");
countRequest.addParameter("size", "0");
String countResponse = toStr(client().performRequest(countRequest));
assertThat(countResponse, containsString("\"total\":" + count));
}

/**
* Tests snapshot/restore by creating a snapshot and restoring it. It takes
* a snapshot on the old cluster and restores it on the old cluster as a
Expand Down Expand Up @@ -892,6 +932,7 @@ public void testHistoryUUIDIsAdded() throws Exception {
mappingsAndSettings.endObject();
client().performRequest("PUT", "/" + index, Collections.emptyMap(),
new StringEntity(Strings.toString(mappingsAndSettings), ContentType.APPLICATION_JSON));

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove extra line

} else {
Response response = client().performRequest("GET", index + "/_stats", singletonMap("level", "shards"));
List<Object> shardStats = ObjectPath.createFromResponse(response).evaluate("indices." + index + ".shards.0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.http.entity.StringEntity;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -285,4 +286,27 @@ public void testSearchGeoPoints() throws Exception {
}
}

public void testRecoverySealedIndex() throws Exception {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testRecoverSyncedFlushIndex

final String index = "recovery_a_sealed_index";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

synced_flush_index

if (CLUSTER_TYPE == ClusterType.OLD) {
Settings.Builder settings = Settings.builder()
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1)
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)
// if the node with the replica is the first to be restarted, while a replica is still recovering
// then delayed allocation will kick in. When the node comes back, the master will search for a copy
// but the recovering copy will be seen as invalid and the cluster health won't return to GREEN
// before timing out
.put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms")
.put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster
createIndex(index, settings.build());
indexDocs(index, 0, randomInt(5));
assertBusy(() -> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above, replace by ensureGreen()?

Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
assertOK(resp);
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
assertThat(result.get("successful"), equalTo(2));
});
}
ensureGreen(index);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,9 @@ private void ensureRefCount() {

@Override
public void prepareForTranslogOperations(boolean fileBasedRecovery, int totalTranslogOps) throws IOException {
if (fileBasedRecovery && indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0)) {
store.ensureIndexHas6xCommitTags();
}
state().getTranslog().totalOperations(totalTranslogOps);
indexShard().openEngineAndSkipTranslogRecovery();
}
Expand Down Expand Up @@ -438,9 +441,6 @@ public void cleanFiles(int totalTranslogOps, Store.MetadataSnapshot sourceMetaDa
store.incRef();
try {
store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetaData);
if (indexShard.indexSettings().getIndexVersionCreated().before(Version.V_6_0_0_rc1)) {
store.ensureIndexHas6xCommitTags();
}
// TODO: Assign the global checkpoint to the max_seqno of the safe commit if the index version >= 6.2
final String translogUUID = Translog.createEmptyTranslog(
indexShard.shardPath().resolveTranslog(), SequenceNumbers.UNASSIGNED_SEQ_NO, shardId, indexShard.getPrimaryTerm());
Expand Down