Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show configured and remaining delay for an unassigned shard. #17515

Merged
merged 2 commits into from Apr 7, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.shard.ShardId;
Expand All @@ -47,6 +48,7 @@ public final class ClusterAllocationExplanation implements ToXContent, Writeable
private final Map<DiscoveryNode, Decision> nodeToDecision;
private final Map<DiscoveryNode, Float> nodeWeights;
private final UnassignedInfo unassignedInfo;
private final long remainingDelayNanos;

public ClusterAllocationExplanation(StreamInput in) throws IOException {
this.shard = ShardId.readShardId(in);
Expand All @@ -73,17 +75,19 @@ public ClusterAllocationExplanation(StreamInput in) throws IOException {
ntw.put(dn, weight);
}
this.nodeWeights = ntw;
remainingDelayNanos = in.readVLong();
}

public ClusterAllocationExplanation(ShardId shard, boolean primary, @Nullable String assignedNodeId,
UnassignedInfo unassignedInfo, Map<DiscoveryNode, Decision> nodeToDecision,
Map<DiscoveryNode, Float> nodeWeights) {
Map<DiscoveryNode, Float> nodeWeights, long remainingDelayNanos) {
this.shard = shard;
this.primary = primary;
this.assignedNodeId = assignedNodeId;
this.unassignedInfo = unassignedInfo;
this.nodeToDecision = nodeToDecision == null ? Collections.emptyMap() : nodeToDecision;
this.nodeWeights = nodeWeights == null ? Collections.emptyMap() : nodeWeights;
this.remainingDelayNanos = remainingDelayNanos;
}

public ShardId getShard() {
Expand Down Expand Up @@ -124,6 +128,11 @@ public Map<DiscoveryNode, Float> getNodeWeights() {
return this.nodeWeights;
}

/** Return the remaining allocation delay for this shard in nanoseconds */
public long getRemainingDelayNanos() {
return this.remainingDelayNanos;
}

public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(); {
builder.startObject("shard"); {
Expand All @@ -141,6 +150,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
// If we have unassigned info, show that
if (unassignedInfo != null) {
unassignedInfo.toXContent(builder, params);
long delay = unassignedInfo.getLastComputedLeftDelayNanos();
builder.field("allocation_delay", TimeValue.timeValueNanos(delay));
builder.field("allocation_delay_ms", TimeValue.timeValueNanos(delay).millis());
builder.field("remaining_delay", TimeValue.timeValueNanos(remainingDelayNanos));
builder.field("remaining_delay_ms", TimeValue.timeValueNanos(remainingDelayNanos).millis());
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not clear on why we need both allocation_delay and remaining_delay. Don't they both convey the time remaining before shard allocation kicks in?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allocation delay is the configured time before the delayed shard is rerouted (by default, 1 minute unless configured on the per-index level), remaining delay is the time left, so for the example output:

  "allocation_delay" : "59.9s",
  "allocation_delay_ms" : 59910,
  "remaining_delay" : "38.9s",
  "remaining_delay_ms" : 38991,

Only remaining_delay will be counting down

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I'm confused is, allocation_delay is set from a call to unassignedInfo#getLastComputedLeftDelayNanos(), and in UnassignedInfo#updateDelay, there is this code:

public long updateDelay(long nanoTimeNow, Settings settings, Settings indexSettings) {
        final long newComputedLeftDelayNanos = getRemainingDelay(nanoTimeNow, settings, indexSettings);
        lastComputedLeftDelayNanos = newComputedLeftDelayNanos;
        return newComputedLeftDelayNanos;
 }

so it seems the lastComputedLeftDelayNanos gets updated to the remaining delay?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is only going to get updated when the master node changes (there is a lot of weird things with this regarding nanos and millis and serialization)

}
builder.startObject("nodes");
for (Map.Entry<DiscoveryNode, Float> entry : nodeWeights.entrySet()) {
Expand Down Expand Up @@ -194,5 +208,6 @@ public void writeTo(StreamOutput out) throws IOException {
entry.getKey().writeTo(out);
out.writeFloat(entry.getValue());
}
out.writeVLong(remainingDelayNanos);
}
}
Expand Up @@ -139,8 +139,14 @@ public static ClusterAllocationExplanation explainShard(ShardRouting shard, Rout
nodeToDecision.put(discoNode, d);
}
}
long remainingDelayNanos = 0;
if (ui != null) {
final MetaData metadata = allocation.metaData();
final Settings indexSettings = metadata.index(shard.index()).getSettings();
remainingDelayNanos = ui.getRemainingDelay(System.nanoTime(), metadata.settings(), indexSettings);
}
return new ClusterAllocationExplanation(shard.shardId(), shard.primary(), shard.currentNodeId(), ui, nodeToDecision,
shardAllocator.weighShard(allocation, shard));
shardAllocator.weighShard(allocation, shard), remainingDelayNanos);
}

@Override
Expand Down
Expand Up @@ -232,19 +232,27 @@ public long getLastComputedLeftDelayNanos() {
}

/**
* Updates delay left based on current time (in nanoseconds) and index/node settings.
* Calculates the delay left based on current time (in nanoseconds) and index/node settings.
*
* @return updated delay in nanoseconds
* @return calculated delay in nanoseconds
*/
public long updateDelay(long nanoTimeNow, Settings settings, Settings indexSettings) {
long delayTimeoutNanos = getAllocationDelayTimeoutSettingNanos(settings, indexSettings);
final long newComputedLeftDelayNanos;
public long getRemainingDelay(final long nanoTimeNow, final Settings settings, final Settings indexSettings) {
final long delayTimeoutNanos = getAllocationDelayTimeoutSettingNanos(settings, indexSettings);
if (delayTimeoutNanos == 0L) {
newComputedLeftDelayNanos = 0L;
return 0L;
} else {
assert nanoTimeNow >= unassignedTimeNanos;
newComputedLeftDelayNanos = Math.max(0L, delayTimeoutNanos - (nanoTimeNow - unassignedTimeNanos));
return Math.max(0L, delayTimeoutNanos - (nanoTimeNow - unassignedTimeNanos));
}
}

/**
* Updates delay left based on current time (in nanoseconds) and index/node settings.
*
* @return updated delay in nanoseconds
*/
public long updateDelay(final long nanoTimeNow, final Settings settings, final Settings indexSettings) {
final long newComputedLeftDelayNanos = getRemainingDelay(nanoTimeNow, settings, indexSettings);
lastComputedLeftDelayNanos = newComputedLeftDelayNanos;
return newComputedLeftDelayNanos;
}
Expand Down
@@ -0,0 +1,73 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.action.admin.cluster.allocation;

import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESSingleNodeTestCase;

import java.util.List;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;

/**
* Tests for the cluster allocation explanation
*/
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
public final class ClusterAllocationExplainIT extends ESIntegTestCase {
public void testDelayShards() throws Exception {
logger.info("--> starting 3 nodes");
List<String> nodes = internalCluster().startNodesAsync(3).get();

// Wait for all 3 nodes to be up
logger.info("--> waiting for 3 nodes to be up");
assertBusy(new Runnable() {
@Override
public void run() {
NodesStatsResponse resp = client().admin().cluster().prepareNodesStats().get();
assertThat(resp.getNodes().length, equalTo(3));
}
});
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, instead of the above to wait for the nodes to come up, could something like this be done?

client().admin().cluster().health(Requests.clusterHealthRequest().waitForNodes(Integer.toString(3))).actionGet();


logger.info("--> creating 'test' index");
prepareCreate("test").setSettings(Settings.settingsBuilder()
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "1m")
.put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 5)
.put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1)).get();
ensureGreen("test");

logger.info("--> stopping a random node");
assertTrue(internalCluster().stopRandomDataNode());

ensureYellow("test");

ClusterAllocationExplainResponse resp = client().admin().cluster().prepareAllocationExplain().useAnyUnassignedShard().get();
ClusterAllocationExplanation cae = resp.getExplanation();
assertThat(cae.getShard().getIndexName(), equalTo("test"));
assertFalse(cae.isPrimary());
assertFalse(cae.isAssigned());
assertThat("expecting a remaining delay, got: " + cae.getRemainingDelayNanos(), cae.getRemainingDelayNanos(), greaterThan(0L));
}
}
Expand Up @@ -64,9 +64,10 @@ public void testExplanationSerialization() throws Exception {
nodeToDecisions.put(dn, d);
nodeToWeight.put(dn, randomFloat());
}


long remainingDelay = randomIntBetween(0, 500);
ClusterAllocationExplanation cae = new ClusterAllocationExplanation(shard, true, "assignedNode", null,
nodeToDecisions, nodeToWeight);
nodeToDecisions, nodeToWeight, remainingDelay);
BytesStreamOutput out = new BytesStreamOutput();
cae.writeTo(out);
StreamInput in = StreamInput.wrap(out.bytes());
Expand All @@ -80,5 +81,6 @@ public void testExplanationSerialization() throws Exception {
assertEquals(nodeToDecisions.get(entry.getKey()), entry.getValue());
}
assertEquals(nodeToWeight, cae2.getNodeWeights());
assertEquals(remainingDelay, cae2.getRemainingDelayNanos());
}
}