cortexlabs · vishalbollu · Mar 25, 2020 · Mar 24, 2020
diff --git a/docs/deployments/api-configuration.md b/docs/deployments/api-configuration.md
@@ -34,11 +34,11 @@ Reference the section below which corresponds to your Predictor type: [Python](#
     max_replica_concurrency: <int>  # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
     window: <duration>  # the time over which to average the API's concurrency (default: 60s)
     downscale_stabilization_period: <duration>  # the API will not scale below the highest recommendation made during this period (default: 5m)
-    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 0m)
-    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
-    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 10)
-    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
-    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
+    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 1m)
+    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
+    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
+    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
+    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
   update_strategy:
     max_surge: <string | int>  # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
@@ -76,11 +76,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_replica_concurrency: <int>  # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
     window: <duration>  # the time over which to average the API's concurrency (default: 60s)
     downscale_stabilization_period: <duration>  # the API will not scale below the highest recommendation made during this period (default: 5m)
-    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 0m)
-    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
-    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 10)
-    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
-    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
+    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 1m)
+    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
+    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
+    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
+    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
   update_strategy:
     max_surge: <string | int>  # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
@@ -117,11 +117,11 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
     max_replica_concurrency: <int>  # the maximum number of in-flight requests per replica before requests are rejected with error code 503 (default: 1024)
     window: <duration>  # the time over which to average the API's concurrency (default: 60s)
     downscale_stabilization_period: <duration>  # the API will not scale below the highest recommendation made during this period (default: 5m)
-    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 0m)
-    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.5)
-    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 10)
-    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.1)
-    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.1)
+    upscale_stabilization_period: <duration>  # the API will not scale above the lowest recommendation made during this period (default: 1m)
+    max_downscale_factor: <float>  # the maximum factor by which to scale down the API on a single scaling event (default: 0.75)
+    max_upscale_factor: <float>  # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
+    downscale_tolerance: <float>  # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
+    upscale_tolerance: <float>  # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
   update_strategy:
     max_surge: <string | int>  # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
     max_unavailable: <string | int>  # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)

diff --git a/docs/deployments/autoscaling.md b/docs/deployments/autoscaling.md
@@ -36,15 +36,15 @@ Cortex autoscales your web services based on your configuration.
 
 * `downscale_stabilization_period` (default: 5m): The API will not scale below the highest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the max of the current recommendation and all recommendations made during the `downscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to decreased traffic, and will reduce thrashing.
 
-* `upscale_stabilization_period` (default: 0m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic.
+* `upscale_stabilization_period` (default: 1m): The API will not scale above the lowest recommendation made during this period. Every 10 seconds, the autoscaler makes a recommendation based on all of the other configuration parameters described here. It will then take the min of the current recommendation and all recommendations made during the `upscale_stabilization_period`, and use that to determine the final number of replicas to scale to. Increasing this value will cause the cluster to react more slowly to increased traffic, and will reduce thrashing. The default is 0 minutes, which means that the cluster will react quickly to increased traffic.
 
-* `max_downscale_factor` (default: 0.5): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic.
+* `max_downscale_factor` (default: 0.75): The maximum factor by which to scale down the API on a single scaling event. For example, if `max_downscale_factor` is 0.5 and there are 10 running replicas, the autoscaler will not recommend fewer than 5 replicas. Increasing this number will allow the cluster to shrink more quickly in response to dramatic dips in traffic.
 
-* `max_upscale_factor` (default: 10): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic.
+* `max_upscale_factor` (default: 1.5): The maximum factor by which to scale up the API on a single scaling event. For example, if `max_upscale_factor` is 10 and there are 5 running replicas, the autoscaler will not recommend more than 50 replicas. Increasing this number will allow the cluster to grow more quickly in response to dramatic spikes in traffic.
 
-* `downscale_tolerance` (default: 0.1): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
+* `downscale_tolerance` (default: 0.05): Any recommendation falling within this factor below the current number of replicas will not trigger a scale down event. For example, if `downscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 18 or 19 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
 
-* `upscale_tolerance` (default: 0.1): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
+* `upscale_tolerance` (default: 0.05): Any recommendation falling within this factor above the current number of replicas will not trigger a scale up event. For example, if `upscale_tolerance` is 0.1 and there are 20 running replicas, a recommendation of 21 or 22 replicas will not be acted on, and the API will remain at 20 replicas. Increasing this value will prevent thrashing, but setting it too high will prevent the cluster from maintaining it's optimal size.
 
 ## Autoscaling Nodes
 

diff --git a/pkg/operator/operator/validations.go b/pkg/operator/operator/validations.go
@@ -255,7 +255,7 @@ var _autoscalingValidation = &cr.StructFieldValidation{
 			{
 				StructField: "UpscaleStabilizationPeriod",
 				StringValidation: &cr.StringValidation{
-					Default: "0s",
+					Default: "1m",
 				},
 				Parser: cr.DurationParser(&cr.DurationValidation{
 					GreaterThanOrEqualTo: pointer.Duration(libtime.MustParseDuration("0s")),
@@ -264,30 +264,30 @@ var _autoscalingValidation = &cr.StructFieldValidation{
 			{
 				StructField: "MaxDownscaleFactor",
 				Float64Validation: &cr.Float64Validation{
-					Default:              0.5,
+					Default:              0.75,
 					GreaterThanOrEqualTo: pointer.Float64(0),
 					LessThan:             pointer.Float64(1),
 				},
 			},
 			{
 				StructField: "MaxUpscaleFactor",
 				Float64Validation: &cr.Float64Validation{
-					Default:     10,
+					Default:     1.5,
 					GreaterThan: pointer.Float64(1),
 				},
 			},
 			{
 				StructField: "DownscaleTolerance",
 				Float64Validation: &cr.Float64Validation{
-					Default:              0.1,
+					Default:              0.05,
 					GreaterThanOrEqualTo: pointer.Float64(0),
 					LessThan:             pointer.Float64(1),
 				},
 			},
 			{
 				StructField: "UpscaleTolerance",
 				Float64Validation: &cr.Float64Validation{
-					Default:              0.1,
+					Default:              0.05,
 					GreaterThanOrEqualTo: pointer.Float64(0),
 				},
 			},