From daeab12dfdd8e39c394c093bf85c6ccf908696b1 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Fri, 31 Jan 2025 13:21:26 -0600
Subject: [PATCH 01/13] partial

---
 src/content/changelogs/ai-gateway.yaml        |   7 +-
 .../ai-gateway/configuration/fallbacks.mdx    |  12 +-
 .../configuration/request-handling.mdx        | 104 ++++++++++++++++++
 src/content/glossary/ai-gateway.yaml          |   4 +
 4 files changed, 123 insertions(+), 4 deletions(-)
 create mode 100644 src/content/docs/ai-gateway/configuration/request-handling.mdx

diff --git a/src/content/changelogs/ai-gateway.yaml b/src/content/changelogs/ai-gateway.yaml
index 57ee466815836df..93a519c79692749 100644
--- a/src/content/changelogs/ai-gateway.yaml
+++ b/src/content/changelogs/ai-gateway.yaml
@@ -5,11 +5,16 @@ productLink: "/ai-gateway/"
 productArea: Developer platform
 productAreaLink: /workers/platform/changelog/platform/
 entries:
+  - publish_date: "2025-01-23"
+    title: Added request handling
+    description: |-
+      * Added [request handling options](/ai-gateway/request-handling/) to help manage AI provider interactions effectively, ensuring your applications remain responsive and reliable.
+
   - publish_date: "2025-01-02"
     title: DeepSeek
     description: |-
       * **Configuration**: Added [DeepSeek](/ai-gateway/providers/deepseek/) as a new provider.
-    
+
   - publish_date: "2024-12-17"
     title: AI Gateway Dashboard
     description: |-
diff --git a/src/content/docs/ai-gateway/configuration/fallbacks.mdx b/src/content/docs/ai-gateway/configuration/fallbacks.mdx
index 98fc7395ccb25fc..d79609b3155b092 100644
--- a/src/content/docs/ai-gateway/configuration/fallbacks.mdx
+++ b/src/content/docs/ai-gateway/configuration/fallbacks.mdx
@@ -9,11 +9,17 @@ import { Render } from "~/components";
 
 Specify model or provider fallbacks with your [Universal endpoint](/ai-gateway/providers/universal/) to handle request failures and ensure reliability.
 
-Fallbacks are currently triggered only when a request encounters an error. We are working to expand fallback functionality to include time-based triggers, which will allow requests that exceed a predefined response time to timeout and fallback. 
+Cloudflare can trigger your fallback provider in response to [request errors](#request-failures) or [predetermined request timeouts](#request-timeouts). The [response header `cf-aig-step`](#response-headercf-aig-step) indicates which step successfully processed the request.
 
-## Example
+## Request failures
 
-In the following example, a request first goes to the [Workers AI](/workers-ai/) Inference API. If the request fails, it falls back to OpenAI. The response header `cf-aig-step` indicates which provider successfully processed the request. 
+By default, Cloudflare triggers your fallback if a model request returns an error.
+
+### Example
+
+In the following example, a request first goes to the [Workers AI](/workers-ai/) Inference API. If the request fails, it falls back to OpenAI.
+
+In the following example, a request first goes to the [Workers AI](/workers-ai/) Inference API. If the request fails, it falls back to OpenAI. The response header `cf-aig-step` indicates which provider successfully processed the request.
 
 1. Sends a request to Workers AI Inference API.
 2. If that request fails, proceeds to OpenAI.
diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
new file mode 100644
index 000000000000000..04ed77d38e5fbf8
--- /dev/null
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -0,0 +1,104 @@
+---
+pcx_content_type: configuration
+title: Request handling
+sidebar:
+  order: 4
+---
+
+import { Render, Aside } from "~/components";
+
+Your AI gateway supports different strategies for handling requests to providers, which allows you to manage AI interactions effectively and ensure your applications remain responsive and reliable.
+
+## Request timeouts
+
+A request timeout allows you to trigger fallbacks or a retry if a provider takes too long to respond.
+
+These timeouts help:
+
+- Improve user experience, by preventing users from waiting too long for a response
+- Proactively handle errors, by detecting unresponsive providers and triggering a fallback option
+
+Request timeouts can be set on a [Universal Endpoint](/ai-gateway/providers/universal/) or directly on a request to any [provider](/ai-gateway/providers/):
+
+- If set on a Universal Endpoint, it specifies the timeout duration for requests and triggers a fallback.
+- If set on a provider request, it specifies the timeout duration for a request and - if exceeded - returns an error.
+
+### Definitions
+
+A timeout is set in milliseconds. Additionaly, the timeout is based on when the first part of the response comes back. As long as the first part of the response returns within the specified timeframe - such as when streaming a response - your gateway will wait for the response.
+
+### Configuration
+
+#### Universal Endpoint
+
+For a Universal endpoint, configure the timeout value by setting a `requestTimeout` property at the
+
+by using one or more of the following properties, which are listed in order of priority:
+
+| Priority | Property                                                                                                               |
+| -------- | ---------------------------------------------------------------------------------------------------------------------- |
+| 1        | `requestTimeout` (added as a universal attribute)                                                                      |
+| 2        | `cf-aig-request-timeout` (header included at the [provider level](/ai-gateway/providers/universal/#payload-reference)) |
+| 3        | `cf-aig-request-timeout` (header included at the request level)                                                        |
+
+Your gateway follows this hierarchy to determine the timeout duration before implementing a fallback.
+
+### Request timeout example
+
+These request timeout values can interact to customize the behavior of your universal gateway.
+
+In this example, the request will try to answer `What is Cloudflare?` within 1000 milliseconds using the normal `@cf/meta/llama-3.1-8b-instruct` model. The `requestTimeout` property takes precedence over the `cf-aig-request-timeout` for `@cf/meta/llama-3.1-8b-instruct`.
+
+If that fails, then the gateway will timeout and move to the fallback `@cf/meta/llama-3.1-8b-instruct-fast` model. This model has 3000 milliseconds - determined by the request-level `cf-aig-request-timeout` value - to complete the request and provide an answer.
+
+```bash title="Request" collapse={36-50} {2,11,13-15}
+curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
+	--header 'cf-aig-request-timeout: 3000' \
+	--header 'Content-Type: application/json' \
+	--data '[
+    {
+        "provider": "workers-ai",
+        "endpoint": "@cf/meta/llama-3.1-8b-instruct",
+        "headers": {
+            "Authorization": "Bearer {cloudflare_token}",
+            "Content-Type": "application/json",
+            "cf-aig-request-timeout": "2000"
+        },
+        "config": {
+            "requestTimeout": 1000
+        },
+        "query": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are a friendly assistant"
+                },
+                {
+                    "role": "user",
+                    "content": "What is Cloduflare?"
+                }
+            ]
+        }
+    },
+    {
+        "provider": "workers-ai",
+        "endpoint": "@cf/meta/llama-3.1-8b-instruct-fast",
+        "headers": {
+            "Authorization": "Bearer {cloudflare_token}",
+            "Content-Type": "application/json"
+        },
+        "query": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are a friendly assistant"
+                },
+                {
+                    "role": "user",
+                    "content": "What is Cloudflare?"
+                }
+            ]
+        }
+    }
+]'
+```
diff --git a/src/content/glossary/ai-gateway.yaml b/src/content/glossary/ai-gateway.yaml
index 4452395050cf9b9..36d378609637fd8 100644
--- a/src/content/glossary/ai-gateway.yaml
+++ b/src/content/glossary/ai-gateway.yaml
@@ -41,6 +41,10 @@ entries:
     general_definition: |-
       Header to [bypass caching for a specific request](/ai-gateway/configuration/caching/#skip-cache-cf-aig-skip-cache).
 
+  - term: cf-aig-request-timeout
+    general_definition: |-
+      Header to trigger a fallback provider based on a [predetermined response time](/ai-gateway/configuration/fallbacks/#request-timeouts) (measured in milliseconds).
+
   # Deprecated headers
   - term: cf-cache-ttl
     general_definition: |-

From df951e2eaa746d1c43f8e6a35986939e62961eb5 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Fri, 31 Jan 2025 14:00:46 -0600
Subject: [PATCH 02/13] mostly cleaned up. Still need to add more headeres to
 glossary

---
 .../configuration/request-handling.mdx        | 99 ++++++++++++++-----
 src/env.d.ts                                  |  1 +
 2 files changed, 75 insertions(+), 25 deletions(-)
 create mode 100644 src/env.d.ts

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index 04ed77d38e5fbf8..27049390e2be118 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -18,10 +18,7 @@ These timeouts help:
 - Improve user experience, by preventing users from waiting too long for a response
 - Proactively handle errors, by detecting unresponsive providers and triggering a fallback option
 
-Request timeouts can be set on a [Universal Endpoint](/ai-gateway/providers/universal/) or directly on a request to any [provider](/ai-gateway/providers/):
-
-- If set on a Universal Endpoint, it specifies the timeout duration for requests and triggers a fallback.
-- If set on a provider request, it specifies the timeout duration for a request and - if exceeded - returns an error.
+Request timeouts can be set on a Universal Endpoint or directly on a request to any provider.
 
 ### Definitions
 
@@ -31,29 +28,12 @@ A timeout is set in milliseconds. Additionaly, the timeout is based on when the
 
 #### Universal Endpoint
 
-For a Universal endpoint, configure the timeout value by setting a `requestTimeout` property at the
-
-by using one or more of the following properties, which are listed in order of priority:
-
-| Priority | Property                                                                                                               |
-| -------- | ---------------------------------------------------------------------------------------------------------------------- |
-| 1        | `requestTimeout` (added as a universal attribute)                                                                      |
-| 2        | `cf-aig-request-timeout` (header included at the [provider level](/ai-gateway/providers/universal/#payload-reference)) |
-| 3        | `cf-aig-request-timeout` (header included at the request level)                                                        |
-
-Your gateway follows this hierarchy to determine the timeout duration before implementing a fallback.
-
-### Request timeout example
+If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request timeout specifies the timeout duration for requests and triggers a fallback.
 
-These request timeout values can interact to customize the behavior of your universal gateway.
+For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property either as a universal attribute or within the provider-specific `config` object.
 
-In this example, the request will try to answer `What is Cloudflare?` within 1000 milliseconds using the normal `@cf/meta/llama-3.1-8b-instruct` model. The `requestTimeout` property takes precedence over the `cf-aig-request-timeout` for `@cf/meta/llama-3.1-8b-instruct`.
-
-If that fails, then the gateway will timeout and move to the fallback `@cf/meta/llama-3.1-8b-instruct-fast` model. This model has 3000 milliseconds - determined by the request-level `cf-aig-request-timeout` value - to complete the request and provide an answer.
-
-```bash title="Request" collapse={36-50} {2,11,13-15}
+```bash title="Provider-level config" {12-14} collapse={15-48}
 curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
-	--header 'cf-aig-request-timeout: 3000' \
 	--header 'Content-Type: application/json' \
 	--data '[
     {
@@ -98,7 +78,76 @@ curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
                     "content": "What is Cloudflare?"
                 }
             ]
-        }
+        },
+				"config": {
+            "requestTimeout": 3000
+        },
     }
 ]'
 ```
+
+To further customize request handling, you can include unique `RequestTimeout` values for each provider and a default across your Universal Endpoint.
+
+In this case, the most specific value takes precedence. A timeout value on a specific provider would take precedence over the one on the endpoint itself.
+
+#### Direct provider
+
+If set on a [provider](/ai-gateway/providers/) request, request timeout specifies the timeout duration for a request and - if exceeded - returns an error.
+
+For a provider-specific endpoint, configure the timeout value by adding a `cf-aig-request-timeout` header.
+
+```bash title="Provider-specific endpoint example" {4}
+curl https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}/workers-ai/@cf/meta/llama-3.1-8b-instruct \
+ --header 'Authorization: Bearer {cf_api_token}' \
+ --header 'Content-Type: application/json' \
+ --header 'cf-aig-request-timeout: 5000'
+ --data '{"prompt": "What is Cloudflare?"}'
+```
+
+---
+
+## Request retries
+
+AI Gateway also supports automatic retries for failed requests, with a maximum of five retry attempts.
+
+This feature improves your application's resiliency, ensuring you can recover from temporary issues without manual intervention.
+
+Request timeouts can be set on a Universal Endpoint or directly on a request to any provider.
+
+### Definitions
+
+With request retries, you can adjust a combination of three properties:
+
+- Number of attempts (max of 5 tries)
+- How long before retrying (in milliseconds, max of 5 seconds)
+- Backoff method (constant, linear, or exponential)
+
+On the final retry attempt, your gateway will wait until the request completes, regardless of how long it takes.
+
+### Configuration
+
+#### Universal endpoint
+
+If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request retry will automatically retry failed requests up to five times before triggering any configured fallbacks.
+
+For a Universal Endpoint, configure the timeout value by setting the following properties in the overall or provider-specific `config`:
+
+```json
+config:{
+	maxAttempts?: number;
+	retryDelay?: number;
+	backoff?: "constant" | "linear" | "exponential";
+}
+```
+
+As with the [request timeout](/ai-gateway/configuration/request-handling/#universal-endpoint), the values can interact with each other to provide more customized logic.
+
+#### Direct provider
+
+If set on a [provider](/ai-gateway/providers/) request, request timeout specifies the timeout duration for a request and - if exceeded - returns an error.
+
+For a provider-specific endpoint, configure the timeout value by adding different header values:
+
+- `cf-aig-max-attempts` (number)
+- `cf-aig-retry-delay` (number)
+- `cf-aig-backoff` ("constant" | "linear" | "exponential)
diff --git a/src/env.d.ts b/src/env.d.ts
new file mode 100644
index 000000000000000..9bc5cb41c24efc4
--- /dev/null
+++ b/src/env.d.ts
@@ -0,0 +1 @@
+/// <reference path="../.astro/types.d.ts" />
\ No newline at end of file

From c5af8b4fc9615612e64208f6a52ab27d431eeef3 Mon Sep 17 00:00:00 2001
From: Kathy <153706637+kathayl@users.noreply.github.com>
Date: Mon, 3 Feb 2025 11:00:07 -0800
Subject: [PATCH 03/13] Update fallbacks.mdx

"in the following example" paragraph was duplicated, so deleting
---
 src/content/docs/ai-gateway/configuration/fallbacks.mdx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/content/docs/ai-gateway/configuration/fallbacks.mdx b/src/content/docs/ai-gateway/configuration/fallbacks.mdx
index d79609b3155b092..9f3e69c604f10c4 100644
--- a/src/content/docs/ai-gateway/configuration/fallbacks.mdx
+++ b/src/content/docs/ai-gateway/configuration/fallbacks.mdx
@@ -17,8 +17,6 @@ By default, Cloudflare triggers your fallback if a model request returns an erro
 
 ### Example
 
-In the following example, a request first goes to the [Workers AI](/workers-ai/) Inference API. If the request fails, it falls back to OpenAI.
-
 In the following example, a request first goes to the [Workers AI](/workers-ai/) Inference API. If the request fails, it falls back to OpenAI. The response header `cf-aig-step` indicates which provider successfully processed the request.
 
 1. Sends a request to Workers AI Inference API.

From fed42cf679dbfe01b7f3fbee04d7dc65dbda9ae8 Mon Sep 17 00:00:00 2001
From: Kathy <153706637+kathayl@users.noreply.github.com>
Date: Mon, 3 Feb 2025 11:13:04 -0800
Subject: [PATCH 04/13] Update request-handling.mdx

slight updates re:
-what to use when using universal
-wording
---
 .../configuration/request-handling.mdx          | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index 27049390e2be118..6ec9d47c8c10757 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -30,7 +30,7 @@ A timeout is set in milliseconds. Additionaly, the timeout is based on when the
 
 If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request timeout specifies the timeout duration for requests and triggers a fallback.
 
-For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property either as a universal attribute or within the provider-specific `config` object.
+For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property within the provider-specific `config` object. Each provider can have a different `requestTimeout` value for granular customization. 
 
 ```bash title="Provider-level config" {12-14} collapse={15-48}
 curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
@@ -41,8 +41,7 @@ curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
         "endpoint": "@cf/meta/llama-3.1-8b-instruct",
         "headers": {
             "Authorization": "Bearer {cloudflare_token}",
-            "Content-Type": "application/json",
-            "cf-aig-request-timeout": "2000"
+            "Content-Type": "application/json"
         },
         "config": {
             "requestTimeout": 1000
@@ -55,7 +54,7 @@ curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
                 },
                 {
                     "role": "user",
-                    "content": "What is Cloduflare?"
+                    "content": "What is Cloudflare?"
                 }
             ]
         }
@@ -86,10 +85,6 @@ curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
 ]'
 ```
 
-To further customize request handling, you can include unique `RequestTimeout` values for each provider and a default across your Universal Endpoint.
-
-In this case, the most specific value takes precedence. A timeout value on a specific provider would take precedence over the one on the endpoint itself.
-
 #### Direct provider
 
 If set on a [provider](/ai-gateway/providers/) request, request timeout specifies the timeout duration for a request and - if exceeded - returns an error.
@@ -130,7 +125,7 @@ On the final retry attempt, your gateway will wait until the request completes,
 
 If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request retry will automatically retry failed requests up to five times before triggering any configured fallbacks.
 
-For a Universal Endpoint, configure the timeout value by setting the following properties in the overall or provider-specific `config`:
+For a Universal Endpoint, configure the retry settings with the following properties in the provider-specific `config`:
 
 ```json
 config:{
@@ -140,11 +135,11 @@ config:{
 }
 ```
 
-As with the [request timeout](/ai-gateway/configuration/request-handling/#universal-endpoint), the values can interact with each other to provide more customized logic.
+As with the [request timeout](/ai-gateway/configuration/request-handling/#universal-endpoint), each provider can have a different retry settings for granular customization. 
 
 #### Direct provider
 
-If set on a [provider](/ai-gateway/providers/) request, request timeout specifies the timeout duration for a request and - if exceeded - returns an error.
+If set on a [provider](/ai-gateway/providers/) request, a request retry will automatically retry failed requests up to five times. On the final retry attempt, your gateway will wait until the request completes, regardless of how long it takes.
 
 For a provider-specific endpoint, configure the timeout value by adding different header values:
 

From 4993ffb5d89fef594e8d10dc25b2d97a66091fd9 Mon Sep 17 00:00:00 2001
From: Kathy <153706637+kathayl@users.noreply.github.com>
Date: Mon, 3 Feb 2025 11:13:48 -0800
Subject: [PATCH 05/13] Update ai-gateway.yaml

update date
---
 src/content/changelogs/ai-gateway.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/content/changelogs/ai-gateway.yaml b/src/content/changelogs/ai-gateway.yaml
index 93a519c79692749..14f518a7dd45a1b 100644
--- a/src/content/changelogs/ai-gateway.yaml
+++ b/src/content/changelogs/ai-gateway.yaml
@@ -5,7 +5,7 @@ productLink: "/ai-gateway/"
 productArea: Developer platform
 productAreaLink: /workers/platform/changelog/platform/
 entries:
-  - publish_date: "2025-01-23"
+  - publish_date: "2025-02-04"
     title: Added request handling
     description: |-
       * Added [request handling options](/ai-gateway/request-handling/) to help manage AI provider interactions effectively, ensuring your applications remain responsive and reliable.

From 5c0eae09a9cfa1dccb2c53a12d830f7c4f9fcbb8 Mon Sep 17 00:00:00 2001
From: Kathy <153706637+kathayl@users.noreply.github.com>
Date: Mon, 3 Feb 2025 15:33:54 -0800
Subject: [PATCH 06/13] Update request-handling.mdx

fix word
---
 src/content/docs/ai-gateway/configuration/request-handling.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index 6ec9d47c8c10757..185f4f3b70f6d79 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -141,7 +141,7 @@ As with the [request timeout](/ai-gateway/configuration/request-handling/#univer
 
 If set on a [provider](/ai-gateway/providers/) request, a request retry will automatically retry failed requests up to five times. On the final retry attempt, your gateway will wait until the request completes, regardless of how long it takes.
 
-For a provider-specific endpoint, configure the timeout value by adding different header values:
+For a provider-specific endpoint, configure the retry settings by adding different header values:
 
 - `cf-aig-max-attempts` (number)
 - `cf-aig-retry-delay` (number)

From ffe91d1324e940c12e18e8642c05acaad2e82732 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Wed, 5 Feb 2025 06:06:14 -0600
Subject: [PATCH 07/13] remove random file

---
 src/env.d.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/env.d.ts b/src/env.d.ts
index 9bc5cb41c24efc4..e69de29bb2d1d64 100644
--- a/src/env.d.ts
+++ b/src/env.d.ts
@@ -1 +0,0 @@
-/// <reference path="../.astro/types.d.ts" />
\ No newline at end of file

From f56cf44532b81bb081e737416947cad49d153e16 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Wed, 5 Feb 2025 06:07:12 -0600
Subject: [PATCH 08/13] remove file

---
 src/env.d.ts | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/env.d.ts

diff --git a/src/env.d.ts b/src/env.d.ts
deleted file mode 100644
index e69de29bb2d1d64..000000000000000

From 01021cf0602610c12e839d61fd542eddd50166ce Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Wed, 5 Feb 2025 06:10:57 -0600
Subject: [PATCH 09/13] Add headers

---
 src/content/changelogs/ai-gateway.yaml |  2 +-
 src/content/glossary/ai-gateway.yaml   | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/content/changelogs/ai-gateway.yaml b/src/content/changelogs/ai-gateway.yaml
index 14f518a7dd45a1b..ebe7d377f18ce71 100644
--- a/src/content/changelogs/ai-gateway.yaml
+++ b/src/content/changelogs/ai-gateway.yaml
@@ -5,7 +5,7 @@ productLink: "/ai-gateway/"
 productArea: Developer platform
 productAreaLink: /workers/platform/changelog/platform/
 entries:
-  - publish_date: "2025-02-04"
+  - publish_date: "2025-02-05"
     title: Added request handling
     description: |-
       * Added [request handling options](/ai-gateway/request-handling/) to help manage AI provider interactions effectively, ensuring your applications remain responsive and reliable.
diff --git a/src/content/glossary/ai-gateway.yaml b/src/content/glossary/ai-gateway.yaml
index 36d378609637fd8..1bbc85598ca2cff 100644
--- a/src/content/glossary/ai-gateway.yaml
+++ b/src/content/glossary/ai-gateway.yaml
@@ -45,6 +45,18 @@ entries:
     general_definition: |-
       Header to trigger a fallback provider based on a [predetermined response time](/ai-gateway/configuration/fallbacks/#request-timeouts) (measured in milliseconds).
 
+  - term: cf-aig-max-attempts
+    general_definition: |-
+      Header to customize the number of max attempts for [request retries](/ai-gateway/configuration/request-handling/#request-retries) of a request.
+
+  - term: cf-aig-retry-delay
+    general_definition: |-
+      Header to customize the retry delay for [request retries](/ai-gateway/configuration/request-handling/#request-retries) of a request.
+
+  - term: cf-aig-backoff
+    general_definition: |-
+      Header to customize the backoff type for [request retries](/ai-gateway/configuration/request-handling/#request-retries) of a request.
+
   # Deprecated headers
   - term: cf-cache-ttl
     general_definition: |-

From 9cb76f690762b6e82a3ac6691fe082c156dd6f03 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Wed, 5 Feb 2025 06:15:23 -0600
Subject: [PATCH 10/13] Added example

---
 .../configuration/request-handling.mdx        | 61 ++++++++++++++++++-
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index 185f4f3b70f6d79..d96e5109cb39249 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -30,7 +30,7 @@ A timeout is set in milliseconds. Additionaly, the timeout is based on when the
 
 If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request timeout specifies the timeout duration for requests and triggers a fallback.
 
-For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property within the provider-specific `config` object. Each provider can have a different `requestTimeout` value for granular customization. 
+For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property within the provider-specific `config` object. Each provider can have a different `requestTimeout` value for granular customization.
 
 ```bash title="Provider-level config" {12-14} collapse={15-48}
 curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
@@ -135,7 +135,64 @@ config:{
 }
 ```
 
-As with the [request timeout](/ai-gateway/configuration/request-handling/#universal-endpoint), each provider can have a different retry settings for granular customization. 
+As with the [request timeout](/ai-gateway/configuration/request-handling/#universal-endpoint), each provider can have a different retry settings for granular customization.
+
+```bash title="Provider-level config" {11-15} collapse={16-55}
+curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
+	--header 'Content-Type: application/json' \
+	--data '[
+    {
+        "provider": "workers-ai",
+        "endpoint": "@cf/meta/llama-3.1-8b-instruct",
+        "headers": {
+            "Authorization": "Bearer {cloudflare_token}",
+            "Content-Type": "application/json"
+        },
+        "config": {
+            "maxAttempts": 2,
+						"retryDelay": 1000,
+						"backoff": "constant"
+        },
+        "query": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are a friendly assistant"
+                },
+                {
+                    "role": "user",
+                    "content": "What is Cloudflare?"
+                }
+            ]
+        }
+    },
+    {
+        "provider": "workers-ai",
+        "endpoint": "@cf/meta/llama-3.1-8b-instruct-fast",
+        "headers": {
+            "Authorization": "Bearer {cloudflare_token}",
+            "Content-Type": "application/json"
+        },
+        "query": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are a friendly assistant"
+                },
+                {
+                    "role": "user",
+                    "content": "What is Cloudflare?"
+                }
+            ]
+        },
+				"config": {
+            "maxAttempts": 4,
+						"retryDelay": 1000,
+						"backoff": "exponential"
+        },
+    }
+]'
+```
 
 #### Direct provider
 

From 3f3eb2590db6bc3eafb2fd845c1e603bd86715a0 Mon Sep 17 00:00:00 2001
From: Kody Jackson <kody@cloudflare.com>
Date: Wed, 5 Feb 2025 09:55:04 -0600
Subject: [PATCH 11/13] Apply suggestions from code review

Co-authored-by: marciocloudflare <83226960+marciocloudflare@users.noreply.github.com>
---
 .../docs/ai-gateway/configuration/request-handling.mdx        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index d96e5109cb39249..6c0d99d6c65eb3e 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -113,8 +113,8 @@ Request timeouts can be set on a Universal Endpoint or directly on a request to
 
 With request retries, you can adjust a combination of three properties:
 
-- Number of attempts (max of 5 tries)
-- How long before retrying (in milliseconds, max of 5 seconds)
+- Number of attempts (maximum of 5 tries)
+- How long before retrying (in milliseconds, maximum of 5 seconds)
 - Backoff method (constant, linear, or exponential)
 
 On the final retry attempt, your gateway will wait until the request completes, regardless of how long it takes.

From acdf89333a3a535d68c503a5a1dc530ed6cd51de Mon Sep 17 00:00:00 2001
From: Kathy <153706637+kathayl@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:17:55 -0800
Subject: [PATCH 12/13] Update request-handling.mdx

Fix typo
---
 src/content/docs/ai-gateway/configuration/request-handling.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index 6c0d99d6c65eb3e..df02bcff256efbd 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -22,7 +22,7 @@ Request timeouts can be set on a Universal Endpoint or directly on a request to
 
 ### Definitions
 
-A timeout is set in milliseconds. Additionaly, the timeout is based on when the first part of the response comes back. As long as the first part of the response returns within the specified timeframe - such as when streaming a response - your gateway will wait for the response.
+A timeout is set in milliseconds. Additionally, the timeout is based on when the first part of the response comes back. As long as the first part of the response returns within the specified timeframe - such as when streaming a response - your gateway will wait for the response.
 
 ### Configuration
 

From 7960f42170e07fc2fba3a780eb4c85065f8877d1 Mon Sep 17 00:00:00 2001
From: kodster28 <kody@cloudflare.com>
Date: Thu, 6 Feb 2025 06:46:39 -0600
Subject: [PATCH 13/13] fix highlight

---
 src/content/changelogs/ai-gateway.yaml                         | 3 ++-
 src/content/docs/ai-gateway/configuration/request-handling.mdx | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/content/changelogs/ai-gateway.yaml b/src/content/changelogs/ai-gateway.yaml
index 0838d418913a1c2..ceab1a00fd67284 100644
--- a/src/content/changelogs/ai-gateway.yaml
+++ b/src/content/changelogs/ai-gateway.yaml
@@ -5,10 +5,11 @@ productLink: "/ai-gateway/"
 productArea: Developer platform
 productAreaLink: /workers/platform/changelog/platform/
 entries:
-   - publish_date: "2025-02-06"
+  - publish_date: "2025-02-06"
     title: Added request handling
     description: |-
       * Added [request handling options](/ai-gateway/request-handling/) to help manage AI provider interactions effectively, ensuring your applications remain responsive and reliable.
+
   - publish_date: "2025-02-05"
     title: New AI Gateway providers
     description: |-
diff --git a/src/content/docs/ai-gateway/configuration/request-handling.mdx b/src/content/docs/ai-gateway/configuration/request-handling.mdx
index df02bcff256efbd..6821aa5480f6544 100644
--- a/src/content/docs/ai-gateway/configuration/request-handling.mdx
+++ b/src/content/docs/ai-gateway/configuration/request-handling.mdx
@@ -32,7 +32,7 @@ If set on a [Universal Endpoint](/ai-gateway/providers/universal/), a request ti
 
 For a Universal Endpoint, configure the timeout value by setting a `requestTimeout` property within the provider-specific `config` object. Each provider can have a different `requestTimeout` value for granular customization.
 
-```bash title="Provider-level config" {12-14} collapse={15-48}
+```bash title="Provider-level config" {11-13} collapse={15-48}
 curl 'https://gateway.ai.cloudflare.com/v1/{account_id}/{gateway_id}' \
 	--header 'Content-Type: application/json' \
 	--data '[