### Advanced Inferencing - Loop, Conversation History and Streaming

In [1]:
import os

from dotenv import load_dotenv

In [2]:
load_dotenv()

def create_http_client(use_local: bool = True):
    from llama_stack_client import LlamaStackClient

    host = os.environ["LLAMA_STACK_SERVER_HOST"]
    port = os.environ["LLAMA_STACK_SERVER_PORT"]

    if use_local:
        return LlamaStackClient(
            base_url=f"http://{host}:{port}"
        )
    
    togther_url = os.environ["TOGETHER_URL"]
    together_api_key = os.environ["TOGETHER_API_KEY"]

    client = LlamaStackClient(
        base_url=togther_url,
        provider_data = {
            "together_api_key": together_api_key
        }
    )
    
    return client

client = (
    create_http_client(use_local=True)
)  

In [3]:
import asyncio
from termcolor import cprint

async def chat_loop():
    while True:
        message = input("Enter message: ")
        
        if message.lower() in ["exit", "quit", "bye"]:
            cprint("Ending the Conversation Loop ...", "yellow")
            break
        
        prompt = {"role":"user", "content": message}
        response = client.inference.chat_completion(
            messages = [prompt],
            model_id=os.environ["INFERENCE_MODEL"]
        )
        
        cprint(f"Response: {response.completion_message.content}", "green")

In [4]:
await chat_loop()

In [13]:
import asyncio
from termcolor import cprint

conversation_history=[]

async def chat_loop():
    while True:
        message = input("Enter message: ")
        
        if message.lower() in ["exit", "quit", "bye"]:
            cprint("Ending the Conversation Loop ...", "yellow")
            break
        
        prompt = {"role":"user", "content": message}
        conversation_history.append(prompt)
        
        response = client.inference.chat_completion(
            messages = conversation_history,
            model_id=os.environ["INFERENCE_MODEL"]
        )
        
        cprint(f"Response: {response.completion_message.content}", "green")
        
        assistant_message = {
            "role":"user",
            "content": response.completion_message.content
        }
        
        conversation_history.append(assistant_message)

In [9]:
await chat_loop()

In [11]:
from llama_stack_client.lib.inference.event_logger import EventLogger

message = {
    "role": "user",
    "content": "write a tweet about the new iPhone 13"
}

cprint(f"Message: {message}", "green")

response = client.inference.chat_completion(
    messages=[message],
    stream=True,
    model_id=os.environ["INFERENCE_MODEL"]
)


for log in EventLogger().log(response):
    log.print()