diff --git a/api-docs/influxdb3/cloud-dedicated/v1-compatibility/swaggerV1Compat.yml b/api-docs/influxdb3/cloud-dedicated/v1-compatibility/swaggerV1Compat.yml index 128021d194..1168d8b375 100644 --- a/api-docs/influxdb3/cloud-dedicated/v1-compatibility/swaggerV1Compat.yml +++ b/api-docs/influxdb3/cloud-dedicated/v1-compatibility/swaggerV1Compat.yml @@ -130,21 +130,9 @@ paths: schema: $ref: '#/components/schemas/LineProtocolLengthError' '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or ingesters are resource constrained. '503': - description: Server is temporarily unavailable to accept writes. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Server is temporarily unavailable to accept writes due to too many concurrent requests or insufficient healthy ingesters. default: description: Internal server error content: @@ -293,13 +281,7 @@ paths: type: string format: binary '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the read again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or the querier is resource constrained. default: description: Error processing query content: @@ -479,13 +461,7 @@ paths: type: string format: binary '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the read again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or queriers are resource constrained. default: description: Error processing query content: diff --git a/api-docs/influxdb3/cloud-dedicated/v2/ref.yml b/api-docs/influxdb3/cloud-dedicated/v2/ref.yml index b638df94f1..f4b3e76fe5 100644 --- a/api-docs/influxdb3/cloud-dedicated/v2/ref.yml +++ b/api-docs/influxdb3/cloud-dedicated/v2/ref.yml @@ -423,15 +423,8 @@ paths: description: | Service unavailable. - - Returns this error if - the server is temporarily unavailable to accept writes. - - Returns a `Retry-After` header that describes when to try the write again. - headers: - Retry-After: - description: Non-negative decimal integer indicating seconds to wait before retrying the request. - schema: - format: int32 - type: integer + - Returns this error if the server is temporarily unavailable to accept writes due to concurrent request limits or insufficient healthy ingesters. + default: $ref: '#/components/responses/GeneralServerError' summary: Write data @@ -562,18 +555,10 @@ paths: type: string '429': description: | - #### InfluxDB Cloud: - - returns this error if a **read** or **write** request exceeds your - plan's [adjustable service quotas](/influxdb3/cloud-dedicated/account-management/limits/#adjustable-service-quotas) - or if a **delete** request exceeds the maximum - [global limit](/influxdb3/cloud-dedicated/account-management/limits/#global-limits) - - returns `Retry-After` header that describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + Too many requests. + + - Returns this error if a **read** or **write** request exceeds rate + limits or if queriers or ingesters are resource constrained. default: content: application/json: @@ -719,21 +704,9 @@ paths: The response body contains details about the [rejected points](/influxdb3/cloud-dedicated/write-data/troubleshoot/#troubleshoot-rejected-points). '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + description: Token is temporarily over quota or ingesters are resource constrained. '503': - description: Server is temporarily unavailable to accept writes. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + description: Server is temporarily unavailable to accept writes due to too many concurrent requests or insufficient healthy ingesters. default: content: application/json: diff --git a/api-docs/influxdb3/clustered/v1-compatibility/swaggerV1Compat.yml b/api-docs/influxdb3/clustered/v1-compatibility/swaggerV1Compat.yml index 7735c655de..6e289f1cc1 100644 --- a/api-docs/influxdb3/clustered/v1-compatibility/swaggerV1Compat.yml +++ b/api-docs/influxdb3/clustered/v1-compatibility/swaggerV1Compat.yml @@ -130,21 +130,9 @@ paths: schema: $ref: '#/components/schemas/LineProtocolLengthError' '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or ingesters are resource constrained. '503': - description: Server is temporarily unavailable to accept writes. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Server is temporarily unavailable to accept writes due to too many concurrent requests or insufficient healthy ingesters. default: description: Internal server error content: @@ -274,13 +262,7 @@ paths: type: string format: binary '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the read again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or the querier is resource constrained. default: description: Error processing query content: @@ -441,13 +423,7 @@ paths: type: string format: binary '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the read again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - type: integer - format: int32 + description: Token is temporarily over quota or queriers are resource constrained. default: description: Error processing query content: diff --git a/api-docs/influxdb3/clustered/v2/ref.yml b/api-docs/influxdb3/clustered/v2/ref.yml index 05507ea497..a93a582f1f 100644 --- a/api-docs/influxdb3/clustered/v2/ref.yml +++ b/api-docs/influxdb3/clustered/v2/ref.yml @@ -419,27 +419,15 @@ paths: '429': description: | Too many requests. - headers: - Retry-After: - description: Non-negative decimal integer indicating seconds to wait before retrying the request. - schema: - format: int32 - type: integer + + - Returns this error if ingesters are resource constrained. '500': $ref: '#/components/responses/InternalServerError' '503': description: | Service unavailable. - - Returns this error if - the server is temporarily unavailable to accept writes. - - Returns a `Retry-After` header that describes when to try the write again. - headers: - Retry-After: - description: Non-negative decimal integer indicating seconds to wait before retrying the request. - schema: - format: int32 - type: integer + - Returns this error if the server is temporarily unavailable to accept writes due to concurrent request limits or insufficient healthy ingesters. default: $ref: '#/components/responses/GeneralServerError' summary: Write data @@ -570,13 +558,9 @@ paths: type: string '429': description: | - Token is temporarily over quota. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + Too many requests. + + - Returns this error if queriers are resource constrained. default: content: application/json: @@ -678,21 +662,9 @@ paths: $ref: '#/components/schemas/LineProtocolLengthError' description: Write has been rejected because the payload is too large. Error message returns max size supported. All data in body was rejected and not written. '429': - description: Token is temporarily over quota. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + description: Too many requests. The service may be temporarily unavailable or ingesters are resource constrained. '503': - description: Server is temporarily unavailable to accept writes. The Retry-After header describes when to try the write again. - headers: - Retry-After: - description: A non-negative decimal integer indicating the seconds to delay after the response is received. - schema: - format: int32 - type: integer + description: Server is temporarily unavailable to accept writes due to too many concurrent requests or insufficient healthy ingesters. default: content: application/json: diff --git a/content/shared/influxdb3-write-guides/troubleshoot-distributed.md b/content/shared/influxdb3-write-guides/troubleshoot-distributed.md index 802d518fd3..29a2e132c0 100644 --- a/content/shared/influxdb3-write-guides/troubleshoot-distributed.md +++ b/content/shared/influxdb3-write-guides/troubleshoot-distributed.md @@ -5,6 +5,7 @@ Learn how to avoid unexpected results and recover from errors when writing to {{ - [Troubleshoot failures](#troubleshoot-failures) - [Troubleshoot rejected points](#troubleshoot-rejected-points) - [Report write issues](#report-write-issues) +{{% show-in "cloud-dedicated,clustered" %}}- [Implement an exponential backoff strategy](#implement-an-exponential-backoff-strategy){{% /show-in %}} ## Handle write responses @@ -39,7 +40,7 @@ The `message` property of the response body may contain additional details about | `404 "Not found"` | A requested **resource type** (for example, "database"), and **resource name** | A requested resource wasn't found | | `422 "Unprocessable Entity"` | `message` contains details about the error | The data isn't allowed (for example, falls outside of the database's retention period). | | `500 "Internal server error"` | Empty | Default status for an error | -| `503 "Service unavailable"` | Empty | The server is temporarily unavailable to accept writes. The `Retry-After` header contains the number of seconds to wait before trying the write again. | +| `503 "Service unavailable"` | Empty | The server is temporarily unavailable or the requested service is resource constrained. [Implement an exponential backoff strategy](#implement-an-exponential-backoff-strategy). | {{% /show-in %}} {{% show-in "cloud-serverless" %}} @@ -346,3 +347,121 @@ Include the support package when contacting InfluxData support through your stan - Business context if the issue affects production systems This comprehensive information will help InfluxData engineers identify root causes and provide targeted solutions for your write issues. + +{{% show-in "cloud-dedicated,clustered" %}} +## Implement an exponential backoff strategy + +Use exponential backoff with jitter for retrying requests that return `429` or `503`. +This reduces load spikes and avoids thundering-herd problems. + +**Recommended parameters**: + +- Base delay: 1s +- Multiplier: 2 (double each retry) +- Max delay: 30s +- Max retries: 5 (increase only with care) +- Jitter: use "full jitter" (random between 0 and computed delay) + +### Exponential backoff examples + +{{< code-tabs-wrapper >}} +{{% code-tabs %}} +[cURL](#) +[Python](#) +[JavaScript](#) +{{% /code-tabs %}} +{{% code-tab-content %}} + + +```sh +base=1 +max_delay=30 +max_retries=5 + +for attempt in $(seq 0 $max_retries); do + resp_code=$(curl -s -o /dev/null -w "%{http_code}" --request POST "https://{{< influxdb/host >}}/write?db=DB" ...) + if [ "$resp_code" -eq 204 ]; then + echo "Write succeeded" + break + fi + + if [ "$resp_code" -ne 429 ] && [ "$resp_code" -ne 503 ]; then + echo "Non-retryable response: $resp_code" + break + fi + + # compute exponential delay and apply full jitter + delay=$(awk -v b=$base -v a=$attempt -v m=$max_delay 'BEGIN{d=b*(2^a); if(d>m) d=m; print d}') + sleep_seconds=$(awk -v d=$delay 'BEGIN{srand(); printf "%.3f", rand()*d}') + sleep $sleep_seconds +done +``` + +{{% /code-tab-content %}} + +{{% code-tab-content %}} + + +```python +import random +import time +import requests + +base = 1.0 +max_delay = 30.0 +max_retries = 5 + +for attempt in range(max_retries + 1): + r = requests.post(url, headers=headers, data=body, timeout=10) + if r.status_code == 204: + break + if r.status_code not in (429, 503): + raise RuntimeError(f"Non-retryable: {r.status_code} {r.text}") + + # exponential backoff with full jitter + retry_delay = min(base * (2 ** attempt), max_delay) + sleep = random.random() * retry_delay # full jitter + time.sleep(sleep) +else: + raise RuntimeError("Max retries exceeded") +``` + +{{% /code-tab-content %}} + +{{% code-tab-content %}} + + +```js +const base = 1000; +const maxDelay = 30000; +const maxRetries = 5; + +async function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } + +for (let attempt = 0; attempt <= maxRetries; attempt++) { + const res = await fetch(url, { method: 'POST', body }); + if (res.status === 204) break; + if (![429, 503].includes(res.status)) throw new Error(`Non-retryable ${res.status}`); + + let delay = base * 2 ** attempt; + delay = Math.min(delay, maxDelay); + + const sleepMs = Math.random() * delay; // full jitter + await sleep(sleepMs); +} +``` + +{{% /code-tab-content %}} +{{< /code-tabs-wrapper >}} + +### Exponential backoff best practices + +- Only retry on idempotent or safe request semantics your client supports. +- Retry only for `429` (Too Many Requests) and `503` (Service Unavailable). +- Do not retry on client errors like `400`, `401`, `404`, `422`. +- Cap the delay with `max_delay` to avoid excessively long waits. +- Limit total retries to avoid infinite loops and provide meaningful errors. +- Log retry attempts and backoff delays for observability and debugging. +- Combine backoff with bounded concurrency to avoid overwhelming the server. + +{{% /show-in %}}