apache · gianm · Mar 9, 2019 · Feb 27, 2019 · Feb 28, 2019 · Feb 28, 2019
diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md
@@ -783,8 +783,8 @@ A sample Coordinator dynamic config JSON object is shown below:
   "replicationThrottleLimit": 10,
   "emitBalancingStats": false,
   "killDataSourceWhitelist": ["wikipedia", "testDatasource"],
-  "historicalNodesInMaintenance": ["localhost:8182", "localhost:8282"],
-  "nodesInMaintenancePriority": 7
+  "decommissioningNodes": ["localhost:8182", "localhost:8282"],
+  "decommissioningVelocity": 7
 }
 ```
 
@@ -803,9 +803,9 @@ Issuing a GET request at the same URL will return the spec that is currently in
 |`killDataSourceWhitelist`|List of dataSources for which kill tasks are sent if property `druid.coordinator.kill.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none|
 |`killAllDataSources`|Send kill tasks for ALL dataSources if property `druid.coordinator.kill.on` is true. If this is set to true then `killDataSourceWhitelist` must not be specified or be empty list.|false|
 |`killPendingSegmentsSkipList`|List of dataSources for which pendingSegments are _NOT_ cleaned up if property `druid.coordinator.kill.pendingSegments.on` is true. This can be a list of comma-separated dataSources or a JSON array.|none|
-|`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" processes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of processes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0|
-|`historicalNodesInMaintenance`| List of Historical nodes in maintenance mode. Coordinator doesn't assign new segments on those nodes and moves segments from the nodes according to a specified priority.|none|
-|`nodesInMaintenancePriority`| Priority of segments from servers in maintenance. Coordinator takes ceil(maxSegmentsToMove * (priority / 10)) from servers in maitenance during balancing phase, i.e.:<br>0 - no segments from servers in maintenance will be processed during balancing<br>5 - 50% segments from servers in maintenance<br>10 - 100% segments from servers in maintenance<br>By leveraging the priority an operator can prevent general nodes from overload or decrease maitenance time instead.|7|
+|`maxSegmentsInNodeLoadingQueue`|The maximum number of segments that could be queued for loading to any given server. This parameter could be used to speed up segments loading process, especially if there are "slow" nodes in the cluster (with low loading speed) or if too much segments scheduled to be replicated to some particular node (faster loading could be preferred to better segments distribution). Desired value depends on segments loading speed, acceptable replication time and number of nodes. Value 1000 could be a start point for a rather big cluster. Default value is 0 (loading queue is unbounded) |0|
+|`decommissioningNodes`| List of 'decommissioning' historical servers. The Coordinator doesn't assign new segments to these servers and moves segments away from the 'decommissioning' servers at the maximum rate specified by `decommissioningVelocity`.|none|
+|`decommissioningVelocity`| Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning' servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to the total maximum segment movements allowed during one run which is determined by the `maxSegmentsToMove` configuration. Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity` is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers. If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers, effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by load rules. Decommissioning can also become stalled if there are no available active servers to place the segments. By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or decrease decommissioning time instead. The value should be between 0 and 10.|7|
 
 To view the audit history of Coordinator dynamic config issue a GET request to the URL -
 

diff --git a/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java b/server/src/main/java/org/apache/druid/server/coordinator/CoordinatorDynamicConfig.java
@@ -56,8 +56,8 @@ public class CoordinatorDynamicConfig
   private final boolean emitBalancingStats;
   private final boolean killAllDataSources;
   private final Set<String> killableDataSources;
-  private final Set<String> historicalNodesInMaintenance;
-  private final int nodesInMaintenancePriority;
+  private final Set<String> decommissioningNodes;
+  private final int decommissioningVelocity;
 
   // The pending segments of the dataSources in this list are not killed.
   private final Set<String> protectedPendingSegmentDatasources;
@@ -88,8 +88,8 @@ public CoordinatorDynamicConfig(
       @JsonProperty("killAllDataSources") boolean killAllDataSources,
       @JsonProperty("killPendingSegmentsSkipList") Object protectedPendingSegmentDatasources,
       @JsonProperty("maxSegmentsInNodeLoadingQueue") int maxSegmentsInNodeLoadingQueue,
-      @JsonProperty("historicalNodesInMaintenance") Object historicalNodesInMaintenance,
-      @JsonProperty("nodesInMaintenancePriority") int nodesInMaintenancePriority
+      @JsonProperty("decommissioningNodes") Object decommissioningNodes,
+      @JsonProperty("decommissioningVelocity") int decommissioningVelocity
   )
   {
     this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting;
@@ -104,12 +104,12 @@ public CoordinatorDynamicConfig(
     this.killableDataSources = parseJsonStringOrArray(killableDataSources);
     this.protectedPendingSegmentDatasources = parseJsonStringOrArray(protectedPendingSegmentDatasources);
     this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue;
-    this.historicalNodesInMaintenance = parseJsonStringOrArray(historicalNodesInMaintenance);
+    this.decommissioningNodes = parseJsonStringOrArray(decommissioningNodes);
     Preconditions.checkArgument(
-        nodesInMaintenancePriority >= 0 && nodesInMaintenancePriority <= 10,
-        "nodesInMaintenancePriority should be in range [0, 10]"
+        decommissioningVelocity >= 0 && decommissioningVelocity <= 10,
+        "decommissioningVelocity should be in range [0, 10]"
     );
-    this.nodesInMaintenancePriority = nodesInMaintenancePriority;
+    this.decommissioningVelocity = decommissioningVelocity;
 
     if (this.killAllDataSources && !this.killableDataSources.isEmpty()) {
       throw new IAE("can't have killAllDataSources and non-empty killDataSourceWhitelist");
@@ -231,32 +231,39 @@ public int getMaxSegmentsInNodeLoadingQueue()
   }
 
   /**
-   * Historical nodes list in maintenance mode. Coordinator doesn't assign new segments on those nodes and moves
-   * segments from those nodes according to a specified priority.
+   * List of historical nodes to 'decommission'. Coordinator doesn't assign new segments on those nodes and moves
+   * segments away from the 'decommissioning' servers at the maximum rate specified by
+   * {@link CoordinatorDynamicConfig#getDecommissioningVelocity}.
    *
    * @return list of host:port entries
    */
   @JsonProperty
-  public Set<String> getHistoricalNodesInMaintenance()
+  public Set<String> getDecommissioningNodes()
   {
-    return historicalNodesInMaintenance;
+    return decommissioningNodes;
   }
 
   /**
-   * Priority of segments from servers in maintenance. Coordinator takes ceil(maxSegmentsToMove * (priority / 10))
-   * from servers in maitenance during balancing phase, i.e.:
-   * 0 - no segments from servers in maintenance will be processed during balancing
-   * 5 - 50% segments from servers in maintenance
-   * 10 - 100% segments from servers in maintenance
-   * By leveraging the priority an operator can prevent general nodes from overload or decrease maitenance time
-   * instead.
+   * Decommissioning velocity determines the maximum number of segments that may be moved away from 'decommissioning'
+   * servers to non-decommissioning (that is, active) servers during one Coordinator's run. This value is relative to
+   * the total maximum segment movements allowed during one run which is determined by
+   * `{@link CoordinatorDynamicConfig#getMaxSegmentsToMove()}.
+   *
+   * Specifically, the maximum is `ceil(maxSegmentsToMove * (velocity / 10))`. For example, if `decommissioningVelocity`
+   * is 5, no more than `ceil(maxSegmentsToMove * 0.5)` segments may be moved away from 'decommissioning' servers.
+   *
+   * If `decommissioningVelocity` is 0, segments will neither be moved from _or to_ 'decommissioning' servers,
+   * effectively putting them in a sort of 'maintenance' mode that will not participate in balancing or assignment by
+   * load rules. Decommissioning can also become stalled if there are no available active servers to place the segments.
+   * By leveraging the velocity an operator can prevent active servers from overload by prioritizing balancing, or
+   * decrease decommissioning time instead. The value should be between 0 and 10.
    *
    * @return number in range [0, 10]
    */
   @JsonProperty
-  public int getNodesInMaintenancePriority()
+  public int getDecommissioningVelocity()
   {
-    return nodesInMaintenancePriority;
+    return decommissioningVelocity;
   }
 
   @Override
@@ -275,8 +282,8 @@ public String toString()
            ", killDataSourceWhitelist=" + killableDataSources +
            ", protectedPendingSegmentDatasources=" + protectedPendingSegmentDatasources +
            ", maxSegmentsInNodeLoadingQueue=" + maxSegmentsInNodeLoadingQueue +
-           ", historicalNodesInMaintenance=" + historicalNodesInMaintenance +
-           ", nodesInMaintenancePriority=" + nodesInMaintenancePriority +
+           ", decommissioningNodes=" + decommissioningNodes +
+           ", decommissioningVelocity=" + decommissioningVelocity +
            '}';
   }
 
@@ -328,10 +335,10 @@ public boolean equals(Object o)
     if (!Objects.equals(protectedPendingSegmentDatasources, that.protectedPendingSegmentDatasources)) {
       return false;
     }
-    if (!Objects.equals(historicalNodesInMaintenance, that.historicalNodesInMaintenance)) {
+    if (!Objects.equals(decommissioningNodes, that.decommissioningNodes)) {
       return false;
     }
-    return nodesInMaintenancePriority == that.nodesInMaintenancePriority;
+    return decommissioningVelocity == that.decommissioningVelocity;
   }
 
   @Override
@@ -350,8 +357,8 @@ public int hashCode()
         maxSegmentsInNodeLoadingQueue,
         killableDataSources,
         protectedPendingSegmentDatasources,
-        historicalNodesInMaintenance,
-        nodesInMaintenancePriority
+        decommissioningNodes,
+        decommissioningVelocity
     );
   }
 
@@ -372,7 +379,7 @@ public static class Builder
     private static final boolean DEFAULT_EMIT_BALANCING_STATS = false;
     private static final boolean DEFAULT_KILL_ALL_DATA_SOURCES = false;
     private static final int DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE = 0;
-    private static final int DEFAULT_MAINTENANCE_MODE_SEGMENTS_PRIORITY = 7;
+    private static final int DEFAULT_DECOMMISSIONING_VELOCITY = 7;
 
     private Long millisToWaitBeforeDeleting;
     private Long mergeBytesLimit;
@@ -386,8 +393,8 @@ public static class Builder
     private Boolean killAllDataSources;
     private Object killPendingSegmentsSkipList;
     private Integer maxSegmentsInNodeLoadingQueue;
-    private Object maintenanceList;
-    private Integer maintenanceModeSegmentsPriority;
+    private Object decommissioningNodes;
+    private Integer decommissioningVelocity;
 
     public Builder()
     {
@@ -407,8 +414,8 @@ public Builder(
         @JsonProperty("killAllDataSources") @Nullable Boolean killAllDataSources,
         @JsonProperty("killPendingSegmentsSkipList") @Nullable Object killPendingSegmentsSkipList,
         @JsonProperty("maxSegmentsInNodeLoadingQueue") @Nullable Integer maxSegmentsInNodeLoadingQueue,
-        @JsonProperty("historicalNodesInMaintenance") @Nullable Object maintenanceList,
-        @JsonProperty("nodesInMaintenancePriority") @Nullable Integer maintenanceModeSegmentsPriority
+        @JsonProperty("decommissioningNodes") @Nullable Object decommissioningNodes,
+        @JsonProperty("decommissioningVelocity") @Nullable Integer decommissioningVelocity
     )
     {
       this.millisToWaitBeforeDeleting = millisToWaitBeforeDeleting;
@@ -423,8 +430,8 @@ public Builder(
       this.killableDataSources = killableDataSources;
       this.killPendingSegmentsSkipList = killPendingSegmentsSkipList;
       this.maxSegmentsInNodeLoadingQueue = maxSegmentsInNodeLoadingQueue;
-      this.maintenanceList = maintenanceList;
-      this.maintenanceModeSegmentsPriority = maintenanceModeSegmentsPriority;
+      this.decommissioningNodes = decommissioningNodes;
+      this.decommissioningVelocity = decommissioningVelocity;
     }
 
     public Builder withMillisToWaitBeforeDeleting(long millisToWaitBeforeDeleting)
@@ -493,15 +500,15 @@ public Builder withMaxSegmentsInNodeLoadingQueue(int maxSegmentsInNodeLoadingQue
       return this;
     }
 
-    public Builder withMaintenanceList(Set<String> list)
+    public Builder withDecommissioningNodes(Set<String> decommissioning)
     {
-      this.maintenanceList = list;
+      this.decommissioningNodes = decommissioning;
       return this;
     }
 
-    public Builder withMaintenanceModeSegmentsPriority(Integer priority)
+    public Builder withDecommissioningVelocity(Integer velocity)
     {
-      this.maintenanceModeSegmentsPriority = priority;
+      this.decommissioningVelocity = velocity;
       return this;
     }
 
@@ -522,10 +529,10 @@ public CoordinatorDynamicConfig build()
           maxSegmentsInNodeLoadingQueue == null
           ? DEFAULT_MAX_SEGMENTS_IN_NODE_LOADING_QUEUE
           : maxSegmentsInNodeLoadingQueue,
-          maintenanceList,
-          maintenanceModeSegmentsPriority == null
-          ? DEFAULT_MAINTENANCE_MODE_SEGMENTS_PRIORITY
-          : maintenanceModeSegmentsPriority
+          decommissioningNodes,
+          decommissioningVelocity == null
+          ? DEFAULT_DECOMMISSIONING_VELOCITY
+          : decommissioningVelocity
       );
     }
 
@@ -548,10 +555,10 @@ public CoordinatorDynamicConfig build(CoordinatorDynamicConfig defaults)
           maxSegmentsInNodeLoadingQueue == null
           ? defaults.getMaxSegmentsInNodeLoadingQueue()
           : maxSegmentsInNodeLoadingQueue,
-          maintenanceList == null ? defaults.getHistoricalNodesInMaintenance() : maintenanceList,
-          maintenanceModeSegmentsPriority == null
-          ? defaults.getNodesInMaintenancePriority()
-          : maintenanceModeSegmentsPriority
+          decommissioningNodes == null ? defaults.getDecommissioningNodes() : decommissioningNodes,
+          decommissioningVelocity == null
+          ? defaults.getDecommissioningVelocity()
+          : decommissioningVelocity
       );
     }
   }

diff --git a/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java b/server/src/main/java/org/apache/druid/server/coordinator/DruidCoordinator.java
@@ -694,7 +694,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter)
                 }
 
                 // Find all historical servers, group them by subType and sort by ascending usage
-                Set<String> nodesInMaintenance = params.getCoordinatorDynamicConfig().getHistoricalNodesInMaintenance();
+                Set<String> decommissioningServers = params.getCoordinatorDynamicConfig().getDecommissioningNodes();
                 final DruidCluster cluster = new DruidCluster();
                 for (ImmutableDruidServer server : servers) {
                   if (!loadManagementPeons.containsKey(server.getName())) {
@@ -709,7 +709,7 @@ public CoordinatorHistoricalManagerRunnable(final int startingLeaderCounter)
                       new ServerHolder(
                           server,
                           loadManagementPeons.get(server.getName()),
-                          nodesInMaintenance.contains(server.getHost())
+                          decommissioningServers.contains(server.getHost())
                       )
                   );
                 }

diff --git a/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java b/server/src/main/java/org/apache/druid/server/coordinator/ServerHolder.java
@@ -32,18 +32,18 @@ public class ServerHolder implements Comparable<ServerHolder>
   private static final Logger log = new Logger(ServerHolder.class);
   private final ImmutableDruidServer server;
   private final LoadQueuePeon peon;
-  private final boolean inMaintenance;
+  private final boolean isDecommissioning;
 
   public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon)
   {
     this(server, peon, false);
   }
 
-  public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean inMaintenance)
+  public ServerHolder(ImmutableDruidServer server, LoadQueuePeon peon, boolean isDecommissioning)
   {
     this.server = server;
     this.peon = peon;
-    this.inMaintenance = inMaintenance;
+    this.isDecommissioning = isDecommissioning;
   }
 
   public ImmutableDruidServer getServer()
@@ -82,14 +82,14 @@ public double getPercentUsed()
   }
 
   /**
-   * Historical nodes can be placed in maintenance mode, which instructs Coordinator to move segments from them
-   * according to a specified priority. The mechanism allows to drain segments from nodes which are planned for
-   * replacement.
-   * @return true if the node is in maitenance mode
+   * Historical nodes can be 'decommissioned', which instructs Coordinator to move segments from them
+   * according to a specified velocity which diverts normal balancer moves for this purpose. The mechanism allows
+   * draining segments from nodes which are planned for replacement.
+   * @return true if the node is decommissioning
    */
-  public boolean isInMaintenance()
+  public boolean isDecommissioning()
   {
-    return inMaintenance;
+    return isDecommissioning;
   }
 
   public long getAvailableSize()