From 740d133ec908ba02b68efe15c6b93ebd71fc118f Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 24 Mar 2020 19:54:20 -0400 Subject: [PATCH] Update autoscaling defaults --- docs/deployments/api-configuration.md | 30 +++++++++++++-------------- docs/deployments/autoscaling.md | 10 ++++----- pkg/operator/operator/validations.go | 10 ++++----- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/deployments/api-configuration.md b/docs/deployments/api-configuration.md index 98e0a17f1e..f8f8ac1760 100644 --- a/docs/deployments/api-configuration.md +++ b/docs/deployments/api-configuration.md @@ -34,11 +34,11 @@ Reference the section below which corresponds to your Predictor type: [Python](# max_replica_concurrency: # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024) window: # the time over which to average the API's concurrency (default: 60s) downscale_stabilization_period: # the API will not scale below the highest recommendation made during this period (default: 5m) - upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 0m) - max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.5) - max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 10) - downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1) - upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1) + upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 1m) + max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.75) + max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) + downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) + upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) update_strategy: max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) @@ -76,11 +76,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput max_replica_concurrency: # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024) window: # the time over which to average the API's concurrency (default: 60s) downscale_stabilization_period: # the API will not scale below the highest recommendation made during this period (default: 5m) - upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 0m) - max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.5) - max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 10) - downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1) - upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1) + upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 1m) + max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.75) + max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) + downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) + upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) update_strategy: max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) @@ -117,11 +117,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput max_replica_concurrency: # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024) window: # the time over which to average the API's concurrency (default: 60s) downscale_stabilization_period: # the API will not scale below the highest recommendation made during this period (default: 5m) - upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 0m) - max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.5) - max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 10) - downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1) - upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1) + upscale_stabilization_period: # the API will not scale above the lowest recommendation made during this period (default: 1m) + max_downscale_factor: # the maximum factor by which to scale down the API on a single scaling event (default: 0.75) + max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) + downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) + upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) update_strategy: max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) diff --git a/docs/deployments/autoscaling.md b/docs/deployments/autoscaling.md index 44136a5816..ecf31872e3 100644 --- a/docs/deployments/autoscaling.md +++ b/docs/deployments/autoscaling.md @@ -36,15 +36,15 @@ Cortex autoscales your web services based on your configuration. * `downscale_stabilization_period` (default: 5m): The API will not scale below the highest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the max of the current recommendation and all recommendations made during the `downscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to decreased traffic, and will reduce thrashing. -* `upscale_stabilization_period` (default: 0m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic. +* `upscale_stabilization_period` (default: 1m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic. -* `max_downscale_factor` (default: 0.5): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic. +* `max_downscale_factor` (default: 0.75): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic. -* `max_upscale_factor` (default: 10): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic. +* `max_upscale_factor` (default: 1.5): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic. -* `downscale_tolerance` (default: 0.1): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size. +* `downscale_tolerance` (default: 0.05): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size. -* `upscale_tolerance` (default: 0.1): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size. +* `upscale_tolerance` (default: 0.05): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size. ## Autoscaling Nodes diff --git a/pkg/operator/operator/validations.go b/pkg/operator/operator/validations.go index 77c3b0448b..985780457c 100644 --- a/pkg/operator/operator/validations.go +++ b/pkg/operator/operator/validations.go @@ -255,7 +255,7 @@ var _autoscalingValidation = &cr.StructFieldValidation{ { StructField: "UpscaleStabilizationPeriod", StringValidation: &cr.StringValidation{ - Default: "0s", + Default: "1m", }, Parser: cr.DurationParser(&cr.DurationValidation{ GreaterThanOrEqualTo: pointer.Duration(libtime.MustParseDuration("0s")), @@ -264,7 +264,7 @@ var _autoscalingValidation = &cr.StructFieldValidation{ { StructField: "MaxDownscaleFactor", Float64Validation: &cr.Float64Validation{ - Default: 0.5, + Default: 0.75, GreaterThanOrEqualTo: pointer.Float64(0), LessThan: pointer.Float64(1), }, @@ -272,14 +272,14 @@ var _autoscalingValidation = &cr.StructFieldValidation{ { StructField: "MaxUpscaleFactor", Float64Validation: &cr.Float64Validation{ - Default: 10, + Default: 1.5, GreaterThan: pointer.Float64(1), }, }, { StructField: "DownscaleTolerance", Float64Validation: &cr.Float64Validation{ - Default: 0.1, + Default: 0.05, GreaterThanOrEqualTo: pointer.Float64(0), LessThan: pointer.Float64(1), }, @@ -287,7 +287,7 @@ var _autoscalingValidation = &cr.StructFieldValidation{ { StructField: "UpscaleTolerance", Float64Validation: &cr.Float64Validation{ - Default: 0.1, + Default: 0.05, GreaterThanOrEqualTo: pointer.Float64(0), }, },