### Runtime setup
The following envs enable stable retries and quiet streaming.

- `SCILLM_FORCE_HTTPX_STREAM=1`
- `LITELLM_MAX_RETRIES=3`, `LITELLM_RETRY_AFTER=1`, `LITELLM_TIMEOUT=45`
- Requires `tenacity` installed for backoff.

In [1]:
import os
os.environ.setdefault('SCILLM_FORCE_HTTPX_STREAM','1')
os.environ.setdefault('LITELLM_MAX_RETRIES','3')
os.environ.setdefault('LITELLM_RETRY_AFTER','1')
os.environ.setdefault('LITELLM_TIMEOUT','45')
try:
    import tenacity  # noqa: F401
    print('tenacity: ok')
except Exception:
    print('tenacity missing — run: pip install tenacity')


tenacity: ok


# Litellm Provider — Perplexity

        Demonstrates calling a Litellm-native provider (Perplexity) via SciLLM.
        Requires `PERPLEXITY_API_KEY`. If `PERPLEXITY_MODEL` is unset, defaults to `sonar`.

In [2]:
import os
from scillm import completion
model = os.environ.get('PERPLEXITY_MODEL','sonar')
key = os.environ.get('PERPLEXITY_API_KEY','')
if not key:
  print('PERPLEXITY_API_KEY not set — skipping live call')
else:
  resp = completion(
    model=model,
    custom_llm_provider='perplexity',
    api_key=key,
    messages=[{'role':'user','content':'In one word, say OK'}],
    max_tokens=8,
    temperature=0,
  )
  print(resp.choices[0].message.get('content',''))

OK


## 2) Async acompletion

        Preferred for live apps and notebooks with other async work.

In [3]:
import os, asyncio, scillm
model = os.environ.get('PERPLEXITY_MODEL','sonar')
key = os.environ.get('PERPLEXITY_API_KEY','')
if not key:
  print('PERPLEXITY_API_KEY not set — skipping live call')
else:
  async def main():
    resp = await scillm.acompletion(
      model=model,
      custom_llm_provider='perplexity',
      api_key=key,
      messages=[{'role':'user','content':'In one word, say OK'}],
      max_tokens=8,
      temperature=0,
      timeout=45,
    )
    print(resp.choices[0].message.get('content',''))
  import nest_asyncio, asyncio as _asyncio
  nest_asyncio.apply()
  loop = _asyncio.get_event_loop()
  loop.run_until_complete(main())

OK.


## 3) Router.parallel_acompletions (batch of 3)

        Fan out three requests concurrently. Always set `timeout` in `kwargs`.

In [4]:
import os, asyncio, scillm
model = os.environ.get('PERPLEXITY_MODEL','sonar')
key = os.environ.get('PERPLEXITY_API_KEY','')
if not key:
  print('PERPLEXITY_API_KEY not set — skipping live call')
else:
  router = scillm.Router(model_list=[{
    'model_name': 'ppx',
    'litellm_params': {
      'custom_llm_provider': 'perplexity',
      'model': model,
      'api_key': key,
    }
  }])
  prompts = ['Say OK-A','Say OK-B','Say OK-C']
  reqs = [{
    'model': 'ppx',
    'messages': [{'role':'user','content': p}],
    'kwargs': {'max_tokens': 8, 'temperature': 0, 'timeout': 30}
  } for p in prompts]
  async def run():
    outs = await router.parallel_acompletions(requests=reqs, concurrency=3)
    print([o.get('choices',[{}])[0].get('message',{}).get('content','') for o in outs])
  import nest_asyncio, asyncio as _asyncio
  nest_asyncio.apply()
  loop = _asyncio.get_event_loop()
  loop.run_until_complete(run())

['**OK-A** is not a standar', 'When you say "OK-B," it', '## Meaning of "OK-C"\n\n**']
