Skip to content

Failed to autoscale after upgrade 8.3.3 to 8.4.1 #89758

@wwang500

Description

@wwang500

Step to reproduce:

  • On cloud production, deployed a 8.3.3 cluster, autoscaling ON
  • Upgrade it to 8.4.1, you will observe that autoscaling event wont be triggered,
  • Run GET _autoscaling/capacity?error_trace, below it is the reponse:
{
  "error": {
    "root_cause": [
      {
        "type": "index_not_found_exception",
        "reason": "no such index [.kibana_task_manager_8.4.1_reindex_temp]",
        "index_uuid": "f95CB6lAR3e6J5MDbFEoQg",
        "index": ".kibana_task_manager_8.4.1_reindex_temp",
        "stack_trace": """[.kibana_task_manager_8.4.1_reindex_temp/f95CB6lAR3e6J5MDbFEoQg] org.elasticsearch.index.IndexNotFoundException: no such index [.kibana_task_manager_8.4.1_reindex_temp]
	at org.elasticsearch.cluster.metadata.Metadata.getIndexSafe(Metadata.java:919)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.nodeLockedSize(ReactiveStorageDeciderService.java:490)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.lambda$maxNodeLockedSize$12(ReactiveStorageDeciderService.java:470)
	at java.util.stream.ReferencePipeline$5$1.accept(ReferencePipeline.java:231)
	at java.util.Iterator.forEachRemaining(Iterator.java:133)
	at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1845)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.LongPipeline.reduce(LongPipeline.java:503)
	at java.util.stream.LongPipeline.max(LongPipeline.java:466)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.maxNodeLockedSize(ReactiveStorageDeciderService.java:470)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService.scale(ReactiveStorageDeciderService.java:145)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculateForDecider(AutoscalingCalculateCapacityService.java:226)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.lambda$calculateForPolicy$3(AutoscalingCalculateCapacityService.java:167)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197)
	at java.util.TreeMap$EntrySpliterator.forEachRemaining(TreeMap.java:3287)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculateForPolicy(AutoscalingCalculateCapacityService.java:168)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.lambda$calculate$2(AutoscalingCalculateCapacityService.java:124)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197)
	at java.util.TreeMap$EntrySpliterator.forEachRemaining(TreeMap.java:3287)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculate(AutoscalingCalculateCapacityService.java:134)
	at org.elasticsearch.xpack.autoscaling.action.TransportGetAutoscalingCapacityAction.computeCapacity(TransportGetAutoscalingCapacityAction.java:103)
	at org.elasticsearch.xpack.autoscaling.action.CapacityResponseCache.lambda$singleThreadRefresh$1(CapacityResponseCache.java:88)
	at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:462)
	at org.elasticsearch.xpack.autoscaling.action.CapacityResponseCache.singleThreadRefresh(CapacityResponseCache.java:88)
	at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:710)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.lang.Thread.run(Thread.java:833)
"""
      }
    ],
    "type": "index_not_found_exception",
    "reason": "no such index [.kibana_task_manager_8.4.1_reindex_temp]",
    "index_uuid": "f95CB6lAR3e6J5MDbFEoQg",
    "index": ".kibana_task_manager_8.4.1_reindex_temp",
    "stack_trace": """[.kibana_task_manager_8.4.1_reindex_temp/f95CB6lAR3e6J5MDbFEoQg] org.elasticsearch.index.IndexNotFoundException: no such index [.kibana_task_manager_8.4.1_reindex_temp]
	at org.elasticsearch.cluster.metadata.Metadata.getIndexSafe(Metadata.java:919)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.nodeLockedSize(ReactiveStorageDeciderService.java:490)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.lambda$maxNodeLockedSize$12(ReactiveStorageDeciderService.java:470)
	at java.util.stream.ReferencePipeline$5$1.accept(ReferencePipeline.java:231)
	at java.util.Iterator.forEachRemaining(Iterator.java:133)
	at java.util.Spliterators$IteratorSpliterator.forEachRemaining(Spliterators.java:1845)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.LongPipeline.reduce(LongPipeline.java:503)
	at java.util.stream.LongPipeline.max(LongPipeline.java:466)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService$AllocationState.maxNodeLockedSize(ReactiveStorageDeciderService.java:470)
	at org.elasticsearch.xpack.autoscaling.storage.ReactiveStorageDeciderService.scale(ReactiveStorageDeciderService.java:145)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculateForDecider(AutoscalingCalculateCapacityService.java:226)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.lambda$calculateForPolicy$3(AutoscalingCalculateCapacityService.java:167)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197)
	at java.util.TreeMap$EntrySpliterator.forEachRemaining(TreeMap.java:3287)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculateForPolicy(AutoscalingCalculateCapacityService.java:168)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.lambda$calculate$2(AutoscalingCalculateCapacityService.java:124)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:197)
	at java.util.TreeMap$EntrySpliterator.forEachRemaining(TreeMap.java:3287)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:509)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:499)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:921)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:682)
	at org.elasticsearch.xpack.autoscaling.capacity.AutoscalingCalculateCapacityService.calculate(AutoscalingCalculateCapacityService.java:134)
	at org.elasticsearch.xpack.autoscaling.action.TransportGetAutoscalingCapacityAction.computeCapacity(TransportGetAutoscalingCapacityAction.java:103)
	at org.elasticsearch.xpack.autoscaling.action.CapacityResponseCache.lambda$singleThreadRefresh$1(CapacityResponseCache.java:88)
	at org.elasticsearch.action.ActionListener.completeWith(ActionListener.java:462)
	at org.elasticsearch.xpack.autoscaling.action.CapacityResponseCache.singleThreadRefresh(CapacityResponseCache.java:88)
	at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:710)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.lang.Thread.run(Thread.java:833)
"""
  },
  "status": 404
}

This above error blocks autoscaling. Generically, the ES storage autoscaling decider should not block all scaling when one index is bad. It might make sense that storage isn’t scaled in this situation but ML should still be able to scale.

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions