From 56a1f4f94402631d87841bdb555f7560731956b0 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Mon, 5 Jan 2026 18:33:06 -0800
Subject: [PATCH 1/4] feat: Implement nested LLM retry configuration via
 `--retries` argument

Co-authored-by: aider-ce (openai/gemini_szmania/gemini-3-flash-preview)
---
 cecli/args.py   |  8 +++++++-
 cecli/main.py   |  5 ++++-
 cecli/models.py | 15 ++++++++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/cecli/args.py b/cecli/args.py
index 1ba9de0cd7f..0395d987a33 100644
--- a/cecli/args.py
+++ b/cecli/args.py
@@ -241,6 +241,12 @@ def get_parser(default_config_files, git_root):
             " If unspecified, defaults to the model's max_chat_history_tokens."
         ),
     )
+    group.add_argument(
+        "--retries",
+        metavar="RETRIES_JSON",
+        help="Specify LLM retry configuration as a JSON string",
+        default=None,
+    )
 
     #######
     group = parser.add_argument_group("Customization Settings")
@@ -1115,4 +1121,4 @@ def main():
 
 if __name__ == "__main__":
     status = main()
-    sys.exit(status)
+    sys.exit(status)
\ No newline at end of file
diff --git a/cecli/main.py b/cecli/main.py
index 624af74cb4a..b49e5583c48 100644
--- a/cecli/main.py
+++ b/cecli/main.py
@@ -556,6 +556,8 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
         args.mcp_servers = convert_yaml_to_json_string(args.mcp_servers)
     if hasattr(args, "custom") and args.custom is not None:
         args.custom = convert_yaml_to_json_string(args.custom)
+    if hasattr(args, "retries") and args.retries is not None:
+        args.retries = convert_yaml_to_json_string(args.retries)
     if args.debug:
         global log_file
         os.makedirs(".cecli/logs/", exist_ok=True)
@@ -861,6 +863,7 @@ def apply_model_overrides(model_name):
         verbose=args.verbose,
         io=io,
         override_kwargs=main_model_overrides,
+        retries=args.retries,
     )
     if args.copy_paste and main_model.copy_paste_transport == "api":
         main_model.enable_copy_paste_mode()
@@ -1285,4 +1288,4 @@ async def graceful_exit(coder=None, exit_code=0):
 
 if __name__ == "__main__":
     status = main()
-    sys.exit(status)
+    sys.exit(status)
\ No newline at end of file
diff --git a/cecli/models.py b/cecli/models.py
index 42410231d8b..5e8f892cd52 100644
--- a/cecli/models.py
+++ b/cecli/models.py
@@ -309,6 +309,7 @@ def __init__(
         verbose=False,
         io=None,
         override_kwargs=None,
+        retries=None,
     ):
         provided_model = model or ""
         if isinstance(provided_model, Model):
@@ -327,6 +328,7 @@ def __init__(
             model = provided_model
         model = MODEL_ALIASES.get(model, model)
         self.name = model
+        self.retries = retries
         self.max_chat_history_tokens = 1024
         self.weak_model = None
         self.editor_model = None
@@ -950,6 +952,17 @@ async def send_completion(
                     "Editor-Version": f"cecli/{__version__}",
                     "Copilot-Integration-Id": "vscode-chat",
                 }
+        if self.retries:
+            try:
+                retries_config = json.loads(self.retries)
+                if "timeout" in retries_config:
+                    kwargs["timeout"] = retries_config["timeout"]
+                if "backoff-factor" in retries_config:
+                    kwargs["num_retries"] = 5
+                if "retry-on-unavailable" in retries_config and retries_config["retry-on-unavailable"]:
+                    kwargs["num_retries"] = kwargs.get("num_retries", 5)
+            except (json.JSONDecodeError, TypeError):
+                pass
         try:
             res = await litellm.acompletion(**kwargs)
         except Exception as err:
@@ -1252,4 +1265,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From eaf9e8c9a7aadc2c4b176aa693485886fdc822d1 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Mon, 5 Jan 2026 23:13:05 -0800
Subject: [PATCH 2/4] docs: Document the nested structure of the `retries`
 configuration

Co-authored-by: aider-ce (openai/openai_gemini_cli/gemini-2.5-pro)
---
 cecli/website/docs/config.md | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md
index 5a12fcb0eb9..372c759ad10 100644
--- a/cecli/website/docs/config.md
+++ b/cecli/website/docs/config.md
@@ -40,5 +40,37 @@ Using an `.env` file:
 CECLI_DARK_MODE=true
 ```
 
-{% include keys.md %}
 
+## Retries
+
+Aider can be configured to retry failed API calls.
+This is useful for handling intermittent network issues or other transient errors.
+The `retries` option is a JSON object that can be configured with the following keys:
+
+- `max-retries`: The maximum number of retries to attempt.
+- `retry-timeout`: The timeout in seconds for each retry.
+- `retry-backoff-factor`: The backoff factor to use between retries.
+- `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors.
+
+Example usage in `.aider.conf.yml`:
+
+```yaml
+retries:
+  max-retries: 3
+  retry-timeout: 30
+  retry-backoff-factor: 1.5
+  retry-on-unavailable: true
+```
+
+This can also be set with the `--retries` command line switch, passing a JSON string:
+
+```
+$ aider --retries '{"max-retries": 3, "retry-timeout": 30, "retry-backoff-factor": 1.5, "retry-on-unavailable": true}'
+```
+
+Or by setting the `CECLI_RETRIES` environment variable:
+
+```
+export CECLI_RETRIES='{"max-retries": 3, "retry-timeout": 30, "retry-backoff-factor": 1.5, "retry-on-unavailable": true}'
+```
+{% include keys.md %}

From 1d0b158d6b1b6b4a6bde2c0955a77f2f4f84d623 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Mon, 5 Jan 2026 23:28:33 -0800
Subject: [PATCH 3/4] docs: Update retries configuration documentation

Co-authored-by: aider-ce (openai/openai_gemini_cli/gemini-2.5-pro)
---
 cecli/website/docs/config.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md
index 372c759ad10..f99b66d6442 100644
--- a/cecli/website/docs/config.md
+++ b/cecli/website/docs/config.md
@@ -47,7 +47,6 @@ Aider can be configured to retry failed API calls.
 This is useful for handling intermittent network issues or other transient errors.
 The `retries` option is a JSON object that can be configured with the following keys:
 
-- `max-retries`: The maximum number of retries to attempt.
 - `retry-timeout`: The timeout in seconds for each retry.
 - `retry-backoff-factor`: The backoff factor to use between retries.
 - `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors.
@@ -56,21 +55,20 @@ Example usage in `.aider.conf.yml`:
 
 ```yaml
 retries:
-  max-retries: 3
   retry-timeout: 30
-  retry-backoff-factor: 1.5
+  retry-backoff-factor: 1.50
   retry-on-unavailable: true
 ```
 
 This can also be set with the `--retries` command line switch, passing a JSON string:
 
 ```
-$ aider --retries '{"max-retries": 3, "retry-timeout": 30, "retry-backoff-factor": 1.5, "retry-on-unavailable": true}'
+$ aider --retries '{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
 ```
 
 Or by setting the `CECLI_RETRIES` environment variable:
 
 ```
-export CECLI_RETRIES='{"max-retries": 3, "retry-timeout": 30, "retry-backoff-factor": 1.5, "retry-on-unavailable": true}'
+export CECLI_RETRIES='{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
 ```
 {% include keys.md %}

From a908e6b3d81ab999e301c7872a95ee7a0126a1c7 Mon Sep 17 00:00:00 2001
From: Your Name <szmania@yahoo.com>
Date: Tue, 6 Jan 2026 12:34:04 -0800
Subject: [PATCH 4/4] feat: Implement full LLM retry logic in send_completion

---
 cecli/models.py | 97 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 68 insertions(+), 29 deletions(-)

diff --git a/cecli/models.py b/cecli/models.py
index 5e8f892cd52..ea8a6c0f8e4 100644
--- a/cecli/models.py
+++ b/cecli/models.py
@@ -956,22 +956,60 @@ async def send_completion(
             try:
                 retries_config = json.loads(self.retries)
                 if "timeout" in retries_config:
-                    kwargs["timeout"] = retries_config["timeout"]
+                    self.request_timeout = retries_config["timeout"]
                 if "backoff-factor" in retries_config:
-                    kwargs["num_retries"] = 5
-                if "retry-on-unavailable" in retries_config and retries_config["retry-on-unavailable"]:
-                    kwargs["num_retries"] = kwargs.get("num_retries", 5)
-            except (json.JSONDecodeError, TypeError):
+                    self.retry_backoff_factor = float(retries_config["backoff-factor"])
+                if "retry-on-unavailable" in retries_config:
+                    self.retry_on_unavailable = bool(retries_config["retry-on-unavailable"])
+                if "retry-timeout" in retries_config:
+                    self.retry_timeout = float(retries_config["retry-timeout"])
+            except (json.JSONDecodeError, TypeError, ValueError):
                 pass
-        try:
-            res = await litellm.acompletion(**kwargs)
-        except Exception as err:
-            print(f"LiteLLM API Error: {str(err)}")
-            res = self.model_error_response()
-            if self.verbose:
-                print(f"LiteLLM API Error: {str(err)}")
-                raise
-        return hash_object, res
+
+        kwargs["timeout"] = self.request_timeout
+
+        litellm_ex = LiteLLMExceptions()
+        retry_delay = 0.125
+
+        while True:
+            try:
+                if self.verbose:
+                    dump(kwargs)
+                res = await litellm.acompletion(**kwargs)
+                return hash_object, res
+            except litellm.ContextWindowExceededError as err:
+                raise err
+            except litellm_ex.exceptions_tuple() as err:
+                ex_info = litellm_ex.get_ex_info(err)
+                should_retry = ex_info.retry
+                if ex_info.name == "ServiceUnavailableError":
+                    should_retry = should_retry or self.retry_on_unavailable
+
+                if should_retry:
+                    retry_delay *= self.retry_backoff_factor
+                    if retry_delay > self.retry_timeout:
+                        should_retry = False
+
+                # Check for non-retryable RateLimitError within ServiceUnavailableError
+                if (
+                    isinstance(err, litellm.ServiceUnavailableError)
+                    and "RateLimitError" in str(err)
+                    and 'status_code: 429, message: "Resource has been exhausted' in str(err)
+                ):
+                    should_retry = False
+
+                if not should_retry:
+                    print(f"LiteLLM API Error: {str(err)}")
+                    if ex_info.description:
+                        print(ex_info.description)
+                    if stream:
+                        return hash_object, self.model_error_response_stream()
+                    else:
+                        return hash_object, self.model_error_response()
+
+                print(f"Retrying in {retry_delay:.1f} seconds...")
+                await asyncio.sleep(retry_delay)
+                continue
 
     async def simple_send_with_retries(self, messages, max_tokens=None):
         from cecli.exceptions import LiteLLMExceptions
@@ -1010,21 +1048,22 @@ async def simple_send_with_retries(self, messages, max_tokens=None):
             except AttributeError:
                 return None
 
-    async def model_error_response(self):
-        for i in range(1):
-            await asyncio.sleep(0.1)
-            yield litellm.ModelResponse(
-                choices=[
-                    litellm.Choices(
-                        finish_reason="stop",
-                        index=0,
-                        message=litellm.Message(
-                            content="Model API Response Error. Please retry the previous request"
-                        ),
-                    )
-                ],
-                model=self.name,
-            )
+    def model_error_response(self):
+        return litellm.ModelResponse(
+            choices=[
+                litellm.Choices(
+                    finish_reason="stop",
+                    index=0,
+                    message=litellm.Message(
+                        content="Model API Response Error. Please retry the previous request"
+                    ),
+                )
+            ],
+            model=self.name,
+        )
+
+    async def model_error_response_stream(self):
+        yield self.model_error_response()
 
 
 def register_models(model_settings_fnames):