In [1]:
from llama_stack_client import LlamaStackClient



In [None]:
client = LlamaStackClient(base_url="http://localhost:8321")


In [4]:
models = client.models.list()
print(models)


INFO:httpx:HTTP Request: GET http://localhost:8321/v1/models "HTTP/1.1 200 OK"


[Model(identifier='accounts/fireworks/models/llama-guard-3-11b-vision', metadata={}, api_model_type='llm', provider_id='fireworks', type='model', provider_resource_id='accounts/fireworks/models/llama-guard-3-11b-vision', model_type='llm'), Model(identifier='accounts/fireworks/models/llama-guard-3-8b', metadata={}, api_model_type='llm', provider_id='fireworks', type='model', provider_resource_id='accounts/fireworks/models/llama-guard-3-8b', model_type='llm'), Model(identifier='accounts/fireworks/models/llama-v3p1-405b-instruct', metadata={}, api_model_type='llm', provider_id='fireworks', type='model', provider_resource_id='accounts/fireworks/models/llama-v3p1-405b-instruct', model_type='llm'), Model(identifier='accounts/fireworks/models/llama-v3p1-70b-instruct', metadata={}, api_model_type='llm', provider_id='fireworks', type='model', provider_resource_id='accounts/fireworks/models/llama-v3p1-70b-instruct', model_type='llm'), Model(identifier='accounts/fireworks/models/llama-v3p1-8b-ins

In [7]:
model_id = next(m for m in models if m.model_type == "llm").identifier
print(model_id)

accounts/fireworks/models/llama-guard-3-11b-vision


In [10]:
response = client.responses.create(
    model="ollama/llama3.2:3b",
    input="Write a haiku about coding."
)

print(response.output_text)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


Lines of code unfold
Logic's gentle, patient art
Beauty in the mess


In [11]:
response = client.responses.create(
    model="ollama/llama3.2:3b",
    input=[
        {
            "role": "system",
            "content": "Extract the participants from the event information.",
        },
        {
            "role": "user",
            "content": "Alice and Bob are going to a science fair on Friday.",
        },
    ],
    text={
        "format": {
            "type": "json_schema",
            "name": "participants",
            "schema": {
                "type": "object",
                "properties": {
                    "participants": {"type": "array", "items": {"type": "string"}}
                },
                "required": ["participants"],
            },
        }
    },
)
print(response.output_text)


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/responses "HTTP/1.1 200 OK"


{
  "participants": [
    "Alice",
    "Bob"
  ]
}


In [12]:
chat_completion = client.chat.completions.create(
    model="ollama/llama3.2:3b",
    messages=[{"role": "user", "content": "Write a haiku about coding."}],
)

print(chat_completion.choices[0].message.content)


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions "HTTP/1.1 200 OK"


Lines of code descend
Logic flows like silent river
Mind at peaceful night


Setting up Vector DBs

In [13]:
# Create http client
import os
from llama_stack_client import LlamaStackClient

client = LlamaStackClient(base_url="http://localhost:8321")


# Register a vector db
vector_db_id = "my_documents"
response = client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model="all-MiniLM-L6-v2",
    embedding_dimension=384,
    provider_id="faiss",
)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/vector-dbs "HTTP/1.1 200 OK"


In [14]:
# You can insert a pre-chunked document directly into the vector db
chunks = [
    {
        "content": "Your document text here",
        "mime_type": "text/plain",
        "metadata": {
            "document_id": "doc1",
            "author": "Jane Doe",
        },
    },
]
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/vector-io/insert "HTTP/1.1 200 OK"


In [16]:
# You can then query for these chunks
chunks_response = client.vector_io.query(
    vector_db_id=vector_db_id, query="What do you know about..."
)
print(chunks_response)


INFO:httpx:HTTP Request: POST http://localhost:8321/v1/vector-io/query "HTTP/1.1 200 OK"


QueryChunksResponse(chunks=[Chunk(content='Your document text here', metadata={'document_id': 'doc1', 'author': 'Jane Doe'}, chunk_metadata=None, embedding=[0.007178888190537691, 0.05773353576660156, 0.0057874335907399654, 0.02382836863398552, 0.058481596410274506, -0.02291838452219963, -0.005766392685472965, 0.08020128309726715, 0.0698930025100708, 0.03262263536453247, 0.05384238809347153, 0.07083042711019516, -0.019970489665865898, -0.022517764940857887, -0.036584723740816116, 0.023153306916356087, -0.013872329145669937, -0.047751761972904205, -0.06715969741344452, 0.03943578153848648, 0.014683041721582413, 0.09099339693784714, 0.043541934341192245, -0.005874773021787405, -0.019319845363497734, 0.08646373450756073, -0.10890086740255356, 0.07165839523077011, 0.02233000285923481, -0.03300706669688225, -0.06765805929899216, 0.01010945811867714, 0.12675723433494568, 0.016023006290197372, 0.10559950023889542, -0.0205655749887228, 0.008839151822030544, -0.03406790643930435, 0.0272862501442

Agents

In [20]:
client.tools.list()

INFO:httpx:HTTP Request: GET http://localhost:8321/v1/tools "HTTP/1.1 200 OK"


[Tool(description='Insert documents into memory', identifier='insert_into_memory', parameters=[], provider_id='rag-runtime', toolgroup_id='builtin::rag', type='tool', metadata=None, provider_resource_id=None),
 Tool(description='Search for information in a database.', identifier='knowledge_search', parameters=[Parameter(description='The query to search for. Can be a natural language sentence or keywords.', name='query', parameter_type='string', required=True, default=None)], provider_id='rag-runtime', toolgroup_id='builtin::rag', type='tool', metadata=None, provider_resource_id=None),
 Tool(description='Search the web for information', identifier='web_search', parameters=[Parameter(description='The query to search for', name='query', parameter_type='string', required=True, default=None)], provider_id='tavily-search', toolgroup_id='builtin::websearch', type='tool', metadata=None, provider_resource_id=None)]

In [21]:
from llama_stack_client import Agent


# Create the agent
agent = Agent(
    client,
    model="ollama/llama3.2:3b",
    instructions="You are a helpful assistant that can use tools to answer questions.",
    tools=["builtin::rag", "builtin::websearch"],
)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8321/v1/tools?toolgroup_id=builtin%3A%3Arag "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8321/v1/tools?toolgroup_id=builtin%3A%3Awebsearch "HTTP/1.1 200 OK"


In [22]:
session_id = agent.create_session(session_name="My conversation")

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/c1b38c12-e89a-44a7-8600-d611884db61d/session "HTTP/1.1 200 OK"


In [23]:
from llama_stack_client import AgentEventLogger

# Create a turn with streaming response
turn_response = agent.create_turn(
    session_id=session_id,
    messages=[{"role": "user", "content": "Tell me about Llama models"}],
)
for log in AgentEventLogger().log(turn_response):
    log.print()

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/c1b38c12-e89a-44a7-8600-d611884db61d/session/819936b9-f5a0-4fe0-90af-cc4f56e4c280/turn "HTTP/1.1 200 OK"


[33minference> [0m[33mThe[0m[33m L[0m[33mlama[0m[33m is[0m[33m a[0m[33m large[0m[33m language[0m[33m model[0m[33m developed[0m[33m by[0m[33m Meta[0m[33m,[0m[33m designed[0m[33m to[0m[33m process[0m[33m natural[0m[33m language[0m[33m and[0m[33m generate[0m[33m human[0m[33m-like[0m[33m text[0m[33m.[0m[33m Like[0m[33m other[0m[33m models[0m[33m in[0m[33m the[0m[33m fair[0m[33mseq[0m[33m project[0m[33m,[0m[33m it[0m[33m uses[0m[33m self[0m[33m-sup[0m[33mervised[0m[33m training[0m[33m on[0m[33m a[0m[33m large[0m[33m corpus[0m[33m of[0m[33m text[0m[33m to[0m[33m learn[0m[33m patterns[0m[33m and[0m[33m relationships[0m[33m within[0m[33m language[0m[33m.[0m[97m[0m
[30m[0m

In [24]:
turn_response = agent.create_turn(
    session_id=session_id,
    messages=[{"role": "user", "content": "Tell me about Eder Ignatowicz"}],
)
for log in AgentEventLogger().log(turn_response):
    log.print()

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/c1b38c12-e89a-44a7-8600-d611884db61d/session/819936b9-f5a0-4fe0-90af-cc4f56e4c280/turn "HTTP/1.1 200 OK"


[33minference> [0m[33m[[0m[33mknowledge[0m[33m_search[0m[33m(query[0m[33m="[0m[33mE[0m[33mder[0m[33m Ign[0m[33mat[0m[33mow[0m[33micz[0m[33m")][0m[97m[0m
[31m400: Invalid value: No vector DBs were provided to the knowledge search tool. Please provide at least one vector DB ID.[0m


In [25]:
client.toolgroups.register(
    toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha"
)

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/toolgroups "HTTP/1.1 400 Bad Request"


BadRequestError: Error code: 400 - {'detail': 'Invalid value: Provider `wolfram-alpha` not found'}

In [28]:
# List all tools
all_tools = client.tools.list_tools()

# List tools in a specific group
group_tools = client.tools.list_tools(toolgroup_id="search_tools")

AttributeError: 'LlamaStackClient' object has no attribute 'list_tools'

In [31]:
from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger

 

agent = Agent(
    client,
    model="ollama/llama3.2:3b",
    instructions="You are a helpful assistant. Use search tool to answer the questions. ",
    tools=["builtin::websearch"],
      extra_headers={
        "X-LlamaStack-Provider-Data": '{"tavily_search_api_key": "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"}'
    }
)
user_prompts = [
    "Which teams played in the NBA Western Conference Finals of 2024. Search the web for the answer.",
    "In which episode and season of South Park does Bill Cosby (BSM-471) first appear? Give me the number and title. Search the web for the answer.",
    "What is the British-American kickboxer Andrew Tate's kickboxing name? Search the web for the answer.",
]

session_id = agent.create_session("test-session")

for prompt in user_prompts:
    response = agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        session_id=session_id,
    )

    for log in AgentEventLogger().log(response):
        log.print()

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:8321/v1/tools?toolgroup_id=builtin%3A%3Awebsearch "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/e1e77f06-4991-4578-b2fc-4f1794f3d5e0/session "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/e1e77f06-4991-4578-b2fc-4f1794f3d5e0/session/62622c7d-b824-49f3-b2b0-2ca36208cd56/turn "HTTP/1.1 200 OK"


[33minference> [0m[36m[0m[36mbr[0m[36mave[0m[36m_search[0m[36m.call[0m[36m(query[0m[36m="[0m[36mN[0m[36mBA[0m[36m Western[0m[36m Conference[0m[36m Finals[0m[36m [0m[36m202[0m[36m4[0m[36m")[0m[97m[0m
[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference Finals 2024'}[0m
[32mtool_execution> Tool:brave_search Response:{"query": "NBA Western Conference Finals 2024", "top_k": [{"url": "https://www.basketball-reference.com/playoffs/2024-nba-western-conference-finals-mavericks-vs-timberwolves.html", "title": "2024 NBA Western Conference Finals - Mavericks vs. Timberwolves", "content": "2024 NBA Western Conference Finals Mavericks vs. Timberwolves. League Champion: Boston Celtics. Finals MVP: Jaylen Brown (20.8 / 5.4 / 5.0).", "score": 0.91988057, "raw_content": null}, {"url": "https://www.nba.com/playoffs/2024/west-final", "title": "2024 Playoffs: West Finals | Timberwolves (3) vs. Mavericks (5) - NBA", "content": "The Dallas Maver

INFO:httpx:HTTP Request: POST http://localhost:8321/v1/agents/e1e77f06-4991-4578-b2fc-4f1794f3d5e0/session/62622c7d-b824-49f3-b2b0-2ca36208cd56/turn "HTTP/1.1 200 OK"


[33minference> [0m[36m[0m[36mbr[0m[36m>
[0m[36mbr[0m[36m>
[0m[36mbr[0m[36m>
[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>
[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>
[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0m[36m>

[0m[36mbr[0

KeyboardInterrupt: 

In [32]:
agents = client.agents.list()


INFO:httpx:HTTP Request: GET http://localhost:8321/v1/agents "HTTP/1.1 200 OK"


AgentListResponse(data=[{'agent_id': '205b549b-2c86-4cbd-bff7-663ada9b221f', 'agent_config': {'sampling_params': {'strategy': {'type': 'greedy'}, 'max_tokens': 0, 'repetition_penalty': 1.0, 'stop': None}, 'input_shields': [], 'output_shields': [], 'toolgroups': ['builtin::websearch'], 'client_tools': [], 'tool_choice': None, 'tool_prompt_format': None, 'tool_config': {'tool_choice': 'auto', 'tool_prompt_format': None, 'system_message_behavior': 'append'}, 'max_infer_iters': 10, 'model': 'ollama/llama3.2:3b', 'instructions': 'You are a helpful assistant. Use search tool to answer the questions. ', 'name': None, 'enable_session_persistence': False, 'response_format': None}, 'created_at': '2025-07-07T14:42:47.987090Z'}, {'agent_id': '3e67b197-3777-4a71-976b-7cc54010654e', 'agent_config': {'sampling_params': {'strategy': {'type': 'greedy'}, 'max_tokens': 0, 'repetition_penalty': 1.0, 'stop': None}, 'input_shields': [], 'output_shields': [], 'toolgroups': [{'name': 'builtin::rag/knowledge_s

In [33]:
agent.save(agent_id="test-bella")


AttributeError: 'Agent' object has no attribute 'save'