Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cecli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,12 @@ def get_parser(default_config_files, git_root):
" If unspecified, defaults to the model's max_chat_history_tokens."
),
)
group.add_argument(
"--retries",
metavar="RETRIES_JSON",
help="Specify LLM retry configuration as a JSON string",
default=None,
)

#######
group = parser.add_argument_group("Customization Settings")
Expand Down Expand Up @@ -1115,4 +1121,4 @@ def main():

if __name__ == "__main__":
status = main()
sys.exit(status)
sys.exit(status)
5 changes: 4 additions & 1 deletion cecli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
args.mcp_servers = convert_yaml_to_json_string(args.mcp_servers)
if hasattr(args, "custom") and args.custom is not None:
args.custom = convert_yaml_to_json_string(args.custom)
if hasattr(args, "retries") and args.retries is not None:
args.retries = convert_yaml_to_json_string(args.retries)
if args.debug:
global log_file
os.makedirs(".cecli/logs/", exist_ok=True)
Expand Down Expand Up @@ -861,6 +863,7 @@ def apply_model_overrides(model_name):
verbose=args.verbose,
io=io,
override_kwargs=main_model_overrides,
retries=args.retries,
)
if args.copy_paste and main_model.copy_paste_transport == "api":
main_model.enable_copy_paste_mode()
Expand Down Expand Up @@ -1285,4 +1288,4 @@ async def graceful_exit(coder=None, exit_code=0):

if __name__ == "__main__":
status = main()
sys.exit(status)
sys.exit(status)
102 changes: 77 additions & 25 deletions cecli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@
verbose=False,
io=None,
override_kwargs=None,
retries=None,
):
provided_model = model or ""
if isinstance(provided_model, Model):
Expand All @@ -327,6 +328,7 @@
model = provided_model
model = MODEL_ALIASES.get(model, model)
self.name = model
self.retries = retries
self.max_chat_history_tokens = 1024
self.weak_model = None
self.editor_model = None
Expand Down Expand Up @@ -950,15 +952,64 @@
"Editor-Version": f"cecli/{__version__}",
"Copilot-Integration-Id": "vscode-chat",
}
try:
res = await litellm.acompletion(**kwargs)
except Exception as err:
print(f"LiteLLM API Error: {str(err)}")
res = self.model_error_response()
if self.verbose:
print(f"LiteLLM API Error: {str(err)}")
raise
return hash_object, res
if self.retries:
try:
retries_config = json.loads(self.retries)
if "timeout" in retries_config:
self.request_timeout = retries_config["timeout"]
if "backoff-factor" in retries_config:
self.retry_backoff_factor = float(retries_config["backoff-factor"])
if "retry-on-unavailable" in retries_config:
self.retry_on_unavailable = bool(retries_config["retry-on-unavailable"])
if "retry-timeout" in retries_config:
self.retry_timeout = float(retries_config["retry-timeout"])
except (json.JSONDecodeError, TypeError, ValueError):
pass

kwargs["timeout"] = self.request_timeout

litellm_ex = LiteLLMExceptions()

Check failure on line 971 in cecli/models.py

View workflow job for this annotation

GitHub Actions / pre-commit

F821 undefined name 'LiteLLMExceptions'
retry_delay = 0.125

while True:
try:
if self.verbose:
dump(kwargs)
res = await litellm.acompletion(**kwargs)
return hash_object, res
except litellm.ContextWindowExceededError as err:
raise err
except litellm_ex.exceptions_tuple() as err:
ex_info = litellm_ex.get_ex_info(err)
should_retry = ex_info.retry
if ex_info.name == "ServiceUnavailableError":
should_retry = should_retry or self.retry_on_unavailable

if should_retry:
retry_delay *= self.retry_backoff_factor
if retry_delay > self.retry_timeout:
should_retry = False

# Check for non-retryable RateLimitError within ServiceUnavailableError
if (
isinstance(err, litellm.ServiceUnavailableError)
and "RateLimitError" in str(err)
and 'status_code: 429, message: "Resource has been exhausted' in str(err)
):
should_retry = False

if not should_retry:
print(f"LiteLLM API Error: {str(err)}")
if ex_info.description:
print(ex_info.description)
if stream:
return hash_object, self.model_error_response_stream()
else:
return hash_object, self.model_error_response()

print(f"Retrying in {retry_delay:.1f} seconds...")
await asyncio.sleep(retry_delay)
continue

async def simple_send_with_retries(self, messages, max_tokens=None):
from cecli.exceptions import LiteLLMExceptions
Expand Down Expand Up @@ -997,21 +1048,22 @@
except AttributeError:
return None

async def model_error_response(self):
for i in range(1):
await asyncio.sleep(0.1)
yield litellm.ModelResponse(
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="Model API Response Error. Please retry the previous request"
),
)
],
model=self.name,
)
def model_error_response(self):
return litellm.ModelResponse(
choices=[
litellm.Choices(
finish_reason="stop",
index=0,
message=litellm.Message(
content="Model API Response Error. Please retry the previous request"
),
)
],
model=self.name,
)

async def model_error_response_stream(self):
yield self.model_error_response()


def register_models(model_settings_fnames):
Expand Down Expand Up @@ -1252,4 +1304,4 @@


if __name__ == "__main__":
main()
main()
32 changes: 31 additions & 1 deletion cecli/website/docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,35 @@ Using an `.env` file:
CECLI_DARK_MODE=true
```

{% include keys.md %}

## Retries

Aider can be configured to retry failed API calls.
This is useful for handling intermittent network issues or other transient errors.
The `retries` option is a JSON object that can be configured with the following keys:

- `retry-timeout`: The timeout in seconds for each retry.
- `retry-backoff-factor`: The backoff factor to use between retries.
- `retry-on-unavailable`: Whether to retry on 503 Service Unavailable errors.

Example usage in `.aider.conf.yml`:

```yaml
retries:
retry-timeout: 30
retry-backoff-factor: 1.50
retry-on-unavailable: true
```

This can also be set with the `--retries` command line switch, passing a JSON string:

```
$ aider --retries '{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
```

Or by setting the `CECLI_RETRIES` environment variable:

```
export CECLI_RETRIES='{"retry-timeout": 30, "retry-backoff-factor": 1.50, "retry-on-unavailable": true}'
```
{% include keys.md %}
Loading