### Runtime setup
The following envs enable stable retries and quiet streaming.

- `SCILLM_DISABLE_AIOHTTP=1` (httpx-only async stability)
- `SCILLM_FORCE_HTTPX_STREAM=1`
- `LITELLM_MAX_RETRIES=3`, `LITELLM_RETRY_AFTER=1`, `LITELLM_TIMEOUT=45`
- Requires `tenacity` installed for backoff.

In [None]:
import os
os.environ.setdefault('SCILLM_DISABLE_AIOHTTP','1')
os.environ.setdefault('SCILLM_FORCE_HTTPX_STREAM','1')
os.environ.setdefault('LITELLM_MAX_RETRIES','3')
os.environ.setdefault('LITELLM_RETRY_AFTER','1')
os.environ.setdefault('LITELLM_TIMEOUT','45')
try:
    import tenacity  # noqa: F401
    print('tenacity: ok')
except Exception:
    print('tenacity missing — run: pip install tenacity')


# Router.parallel_acompletions

In [None]:
import os, asyncio
import nest_asyncio; nest_asyncio.apply()
from scillm import Router
router = Router(default_litellm_params={
  'api_base': os.environ['CHUTES_API_BASE'],
  'api_key': os.environ['CHUTES_API_KEY'],
  'custom_llm_provider': 'openai_like',
})
prompts = ['OK-A','OK-B','OK-C']
reqs = [{
  'model': os.environ['CHUTES_MODEL'],
  'messages': [{'role':'user','content': p}],
  'kwargs': {'max_tokens': 8, 'temperature': 0, 'timeout': 20}
} for p in prompts]
async def run():
  outs = await router.parallel_acompletions(requests=reqs, concurrency=2)
  print([ (o.get('choices',[{}])[0].get('message',{}).get('content','') or '').strip() for o in outs ])
loop = asyncio.get_event_loop()
loop.run_until_complete(run())

### Router + Fallbacks (Text & VLM) — Recommended
Batch/parallel paths work with Router too. Define routers per kind and call them in your batch.

In [None]:
import os
from litellm import Router
router_vlm = Router(model_list=[
  {"model_name": "chutes/vlm",
   "litellm_params": {"custom_llm_provider": "openai_like",
     "model": os.environ.get('CHUTES_VLM_MODEL',''),
     "api_base": os.environ['CHUTES_API_BASE'],
     "api_key": os.environ['CHUTES_API_KEY'],
     "order": 1}},
  {"model_name": "chutes/vlm",
   "litellm_params": {"custom_llm_provider": "openai_like",
     "model": os.environ.get('CHUTES_VLM_MODEL_ALT1',''),
     "api_base": os.environ['CHUTES_API_BASE'],
     "api_key": os.environ['CHUTES_API_KEY'],
     "order": 2}},
])
out = router_vlm.completion(
  model='chutes/vlm',
  messages=[{"role":"user","content":[{"type":"text","text":'Return only {\"ok\": true} as JSON.'}]}],
  response_format={"type":"json_object"},
)
print(out.choices[0].message.get('content',''))
