cecli-dev · dwash96 · Jan 18, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/cecli/args.py b/cecli/args.py
@@ -241,6 +241,12 @@ def get_parser(default_config_files, git_root):
             " If unspecified, defaults to the model's max_chat_history_tokens."
         ),
     )
+    group.add_argument(
+        "--retries",
+        metavar="RETRIES_JSON",
+        help="Specify LLM retry configuration as a JSON string",
+        default=None,
+    )
 
     #######
     group = parser.add_argument_group("Customization Settings")
@@ -1115,4 +1121,4 @@ def main():
 
 if __name__ == "__main__":
     status = main()
-    sys.exit(status)
+    sys.exit(status)
diff --git a/cecli/main.py b/cecli/main.py
@@ -556,6 +556,8 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
         args.mcp_servers = convert_yaml_to_json_string(args.mcp_servers)
     if hasattr(args, "custom") and args.custom is not None:
         args.custom = convert_yaml_to_json_string(args.custom)
+    if hasattr(args, "retries") and args.retries is not None:
+        args.retries = convert_yaml_to_json_string(args.retries)
     if args.debug:
         global log_file
         os.makedirs(".cecli/logs/", exist_ok=True)
@@ -861,6 +863,7 @@ def apply_model_overrides(model_name):
         verbose=args.verbose,
         io=io,
         override_kwargs=main_model_overrides,
+        retries=args.retries,
     )
     if args.copy_paste and main_model.copy_paste_transport == "api":
         main_model.enable_copy_paste_mode()
@@ -1285,4 +1288,4 @@ async def graceful_exit(coder=None, exit_code=0):
 
 if __name__ == "__main__":
     status = main()
-    sys.exit(status)
+    sys.exit(status)
diff --git a/cecli/models.py b/cecli/models.py
@@ -309,6 +309,7 @@
         verbose=False,
         io=None,
         override_kwargs=None,
+        retries=None,
     ):
         provided_model = model or ""
         if isinstance(provided_model, Model):
@@ -327,6 +328,7 @@
             model = provided_model
         model = MODEL_ALIASES.get(model, model)
         self.name = model
+        self.retries = retries
         self.max_chat_history_tokens = 1024
         self.weak_model = None
         self.editor_model = None
@@ -950,15 +952,64 @@
                     "Editor-Version": f"cecli/{__version__}",
                     "Copilot-Integration-Id": "vscode-chat",
                 }
-        try:
-            res = await litellm.acompletion(**kwargs)
-        except Exception as err:
-            print(f"LiteLLM API Error: {str(err)}")
-            res = self.model_error_response()
-            if self.verbose:
-                print(f"LiteLLM API Error: {str(err)}")
-                raise
-        return hash_object, res
+        if self.retries:
+            try:
+                retries_config = json.loads(self.retries)
+                if "timeout" in retries_config:
+                    self.request_timeout = retries_config["timeout"]
+                if "backoff-factor" in retries_config:
+                    self.retry_backoff_factor = float(retries_config["backoff-factor"])
+                if "retry-on-unavailable" in retries_config:
+                    self.retry_on_unavailable = bool(retries_config["retry-on-unavailable"])
+                if "retry-timeout" in retries_config:
+                    self.retry_timeout = float(retries_config["retry-timeout"])
+            except (json.JSONDecodeError, TypeError, ValueError):
+                pass
+
+        kwargs["timeout"] = self.request_timeout
+
+        litellm_ex = LiteLLMExceptions()
+        retry_delay = 0.125
+
+        while True:
+            try:
+                if self.verbose:
+                    dump(kwargs)
+                res = await litellm.acompletion(**kwargs)
+                return hash_object, res
+            except litellm.ContextWindowExceededError as err:
+                raise err
+            except litellm_ex.exceptions_tuple() as err:
+                ex_info = litellm_ex.get_ex_info(err)
+                should_retry = ex_info.retry
+                if ex_info.name == "ServiceUnavailableError":
+                    should_retry = should_retry or self.retry_on_unavailable
+
+                if should_retry:
+                    retry_delay *= self.retry_backoff_factor
+                    if retry_delay > self.retry_timeout:
+                        should_retry = False
+
+                # Check for non-retryable RateLimitError within ServiceUnavailableError
+                if (
+                    isinstance(err, litellm.ServiceUnavailableError)
+                    and "RateLimitError" in str(err)
+                    and 'status_code: 429, message: "Resource has been exhausted' in str(err)
+                ):
+                    should_retry = False
+
+                if not should_retry:
+                    print(f"LiteLLM API Error: {str(err)}")
+                    if ex_info.description:
+                        print(ex_info.description)
+                    if stream:
+                        return hash_object, self.model_error_response_stream()
+                    else:
+                        return hash_object, self.model_error_response()
+
+                print(f"Retrying in {retry_delay:.1f} seconds...")
+                await asyncio.sleep(retry_delay)
+                continue
 
     async def simple_send_with_retries(self, messages, max_tokens=None):
         from cecli.exceptions import LiteLLMExceptions
@@ -997,21 +1048,22 @@
             except AttributeError:
                 return None
 
-    async def model_error_response(self):
-        for i in range(1):
-            await asyncio.sleep(0.1)
-            yield litellm.ModelResponse(
-                choices=[
-                    litellm.Choices(
-                        finish_reason="stop",
-                        index=0,
-                        message=litellm.Message(
-                            content="Model API Response Error. Please retry the previous request"
-                        ),
-                    )
-                ],
-                model=self.name,
-            )
+    def model_error_response(self):
+        return litellm.ModelResponse(
+            choices=[
+                litellm.Choices(
+                    finish_reason="stop",
+                    index=0,
+                    message=litellm.Message(
+                        content="Model API Response Error. Please retry the previous request"
+                    ),
+                )
+            ],
+            model=self.name,
+        )
+
+    async def model_error_response_stream(self):
+        yield self.model_error_response()
 
 
 def register_models(model_settings_fnames):
@@ -1252,4 +1304,4 @@
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/cecli/website/docs/config.md b/cecli/website/docs/config.md
@@ -40,5 +40,35 @@ Using an `.env` file:
 CECLI_DARK_MODE=true
 ```
 
-{% include keys.md %}
 
+## Retries
+
+Aider can be configured to retry failed API calls.
+This is useful for handling intermittent network issues or other transient errors.
+The `retries` option is a JSON object that can be configured with the following keys:
+
+- `retry-timeout`: The timeout in seconds for each retry.
+- `retry-backoff-factor`: The backoff factor to use between retries.
+- `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors.
+
+Example usage in `.aider.conf.yml`:
+
+```yaml
+retries:
+  retry-timeout: 30
+  retry-backoff-factor: 1.50
+  retry-on-unavailable: true
+```
+
+This can also be set with the `--retries` command line switch, passing a JSON string:
+
+```
+$ aider --retries '{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
+```
+
+Or by setting the `CECLI_RETRIES` environment variable:
+
+```
+export CECLI_RETRIES='{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
+```
+{% include keys.md %}