### Switching Llama Stack Server from Local and Remote

In [1]:
import os
import sys

from dotenv import load_dotenv

In [4]:
load_dotenv() 

True

In [6]:
from llama_stack_client import LlamaStackClient

LLAMA_STACK_SERVER_HOST = os.environ["LLAMA_STACK_SERVER_HOST"]
LLAMA_STACK_SERVER_PORT = os.environ["LLAMA_STACK_SERVER_PORT"]

LLAMA_STACK_SERVER_REMOTE_HOST = os.environ["LLAMA_STACK_SERVER_REMOTE_HOST"]
LLAMA_STACK_SERVER_REMOTE_PORT = os.environ["LLAMA_STACK_SERVER_REMOTE_PORT"]

local_client = LlamaStackClient(base_url=f'http://{LLAMA_STACK_SERVER_HOST}:{LLAMA_STACK_SERVER_PORT}')
cloud_client = LlamaStackClient(base_url=f'http://{LLAMA_STACK_SERVER_REMOTE_HOST}:{LLAMA_STACK_SERVER_REMOTE_PORT}')

In [7]:
import httpx
from termcolor import cprint

async def select_client(use_local: bool) -> LlamaStackClient:
    if use_local:
        return local_client

    if not use_local:
        return cloud_client

client = await select_client(use_local=False)

In [8]:
from termcolor import cprint
from llama_stack_client.lib.inference.event_logger import EventLogger

INFERENCE_MODEL = os.environ["INFERENCE_MODEL"]

async def get_llama_response(stream: bool = True, use_local: bool = True):
    client = await select_client(use_local)  # Selects the available client
    message = {
        "role": "user",
        "content": 'hello world, write me a 2 sentence poem about the moon'
    }
    
    cprint(f'User> {message["content"]}', 'green')

    response = client.inference.chat_completion(
        messages=[message],
        model_id=INFERENCE_MODEL,
        stream=stream,
    )
    
    if not stream:
        cprint(f'> Response: {response.completion_message.content}', 'cyan')
    else:
        async for log in EventLogger().log(response):
            log.print()

In [9]:
import asyncio

# Run this function directly in a Jupyter Notebook cell with `await`
await get_llama_response(stream=False, use_local=False)

# To run it in a python file, use this line instead
# asyncio.run(get_llama_response(use_local=False))

[32mUser> hello world, write me a 2 sentence poem about the moon[0m
[36m> Response: Here is a 2-sentence poem about the moon:

The moon glows bright in the midnight sky,
A silver crescent, catching the eye.[0m


In [10]:
import asyncio

# Run this function directly in a Jupyter Notebook cell with `await`
await get_llama_response(stream=False, use_local=True)

# To run it in a python file, use this line instead
# asyncio.run(get_llama_response(use_local=False))

[32mUser> hello world, write me a 2 sentence poem about the moon[0m
[36m> Response: Here is a 2-sentence poem about the moon:

The moon glows bright in the midnight sky,
A silver crescent, catching the eye.[0m
