# RAG Confluence Example

Rag llama stack example


Import relevant modules


In [1]:
%pip install --quiet llama-stack-client requests markdownify

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Set configuration

In [None]:
import os, uuid

# --- Llama Stack base URL (same as your working sample) ---
LLAMA_BASE_URL = os.getenv("LLAMA_BASE_URL", "http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321").rstrip("/")

# --- Confluence Cloud (Atlassian) ---
CONF_CLOUD_ID   = os.getenv("CONF_CLOUD_ID", "")       # e.g. "84927973-adf1-4112-be18-59ea4f9c3d60"
CONF_USER       = os.getenv("CONF_USER", "")           # Atlassian account email
CONF_API_TOKEN  = os.getenv("CONF_API_TOKEN", "")      # Atlassian API token

# Ingest ONLY this Confluence space (by human-friendly name)
SPACE_NAME  = os.getenv("SPACE_NAME", "Known Issues").strip()

# Stable vector DB/collection name for this Confluence space
VECTOR_DB_ID = os.getenv("VECTOR_DB_ID", "conf-known-issues")

# --- Simple filters (optional) ---
SPACE_KEYS  = [s.strip() for s in os.getenv("SPACE_KEYS", "KI").split(",") if s.strip()]
LABELS      = [s.strip() for s in os.getenv("LABELS", "").split(",") if s.strip()]
SINCE_HOURS = int(os.getenv("SINCE_HOURS", "0"))     # last 7 days by default

assert CONF_CLOUD_ID and CONF_USER and CONF_API_TOKEN, "Please set CONF_CLOUD_ID, CONF_USER and CONF_API_TOKEN"
assert SPACE_NAME, "Please set SPACE_NAME"

Cell 3 — imports + lightweight helpers

In [3]:
from llama_stack_client import LlamaStackClient, Agent
from llama_stack_client.types import Document
import requests, uuid, re
from markdownify import markdownify as html2md
from pathlib import Path
from llama_stack_client import AgentEventLogger

client = LlamaStackClient(base_url=LLAMA_BASE_URL)

def conf_session(user, token):
    s = requests.Session()
    s.auth = (user, token)
    s.headers.update({"Accept": "application/json"})
    return s

def resolve_space_key_by_name(session, cloud_id, space_name):
    """
    Return the first space 'key' whose 'name' matches (case-insensitive).
    """
    api_base = f"https://api.atlassian.com/ex/confluence/{cloud_id}/wiki/rest/api"
    url = f"{api_base}/space"
    start = 0
    limit = 50
    name_lc = space_name.strip().lower()
    while True:
        resp = session.get(url, params={"start": start, "limit": limit}, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        results = data.get("results", [])
        for sp in results:
            if str(sp.get("name","")).strip().lower() == name_lc:
                return sp.get("key")
        if len(results) < limit:
            break
        start += len(results)
    return None

def build_cql(space_key, labels, since_hours):
    parts = ["type=page"]
    if since_hours and since_hours > 0:
        parts.append(f'lastmodified > now("-{since_hours}h")')
    if space_key:
        parts.append(f'space="{space_key}"')
    if labels:
        parts.append("(" + " OR ".join([f'label="{l}"' for l in labels]) + ")")
    return " and ".join(parts)

def conf_search_pages(session, cloud_id, cql, limit=50):
    api_base = f"https://api.atlassian.com/ex/confluence/{cloud_id}/wiki/rest/api"
    url = f"{api_base}/content/search"
    start = 0
    while True:
        resp = session.get(url, params={
            "cql": cql,
            "limit": limit,
            "start": start,
            "expand": "body.export_view,version,metadata.labels,space,history.lastUpdated",
        }, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        results = data.get("results", [])
        if not results:
            break
        for it in results:
            yield it
        if len(results) < limit:
            break
        start += len(results)

def html_to_markdown(html: str) -> str:
    md = html_to_md = html2md(html or "", strip=['script', 'style'])
    md = re.sub(r"\s+\n", "\n", md)
    md = re.sub(r"\n{3,}", "\n\n", md)
    return md.strip()



Cell 4 — discover models + create a fresh vector DB

In [4]:
VDB_ID_FILE = Path("conf_known_issues.vdb")

# 1) If we’ve saved a real ID before, reuse it (and verify it still exists)
saved_id = VDB_ID_FILE.read_text().strip() if VDB_ID_FILE.exists() else None
vector_db_id = None

if saved_id:
    try:
        # Ensure it still exists
        vdbs = list(client.vector_dbs.list())
        match = next((v for v in vdbs if getattr(v, "identifier", None) == saved_id), None)
        if match:
            vector_db_id = match.identifier
            print(f"Reusing saved vector DB: {vector_db_id}")
        else:
            print("Saved vector DB not found; will register a new one.")
    except Exception as e:
        print("Warning: could not list vector DBs; will try register:", e)

# 2) If not found, create and save
if not vector_db_id:
    embed = next(m for m in client.models.list() if m.model_type == "embedding")
    vdb = client.vector_dbs.register(
        vector_db_id=VECTOR_DB_ID,   # may be ignored; server can mint its own id
        embedding_model=embed.identifier,
    )
    vector_db_id = vdb.identifier
    VDB_ID_FILE.write_text(vector_db_id)
    print(f"Created vector DB: {vector_db_id} (embedding_model={embed.identifier}) and saved to {VDB_ID_FILE.name}")

vector_db_id

INFO:httpx:HTTP Request: GET http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/vector-dbs "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/models "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/vector-dbs "HTTP/1.1 200 OK"


Saved vector DB not found; will register a new one.
Created vector DB: vs_15c9ad54-989d-4d1b-9a81-3bbb13f25b33 (embedding_model=granite-embedding-125m) and saved to conf_known_issues.vdb


'vs_15c9ad54-989d-4d1b-9a81-3bbb13f25b33'

Cell 5 — fetch Confluence pages and prepare Documents (one per page)

Keep it simple: we let Llama Stack do chunking via chunk_size_in_tokens at insert time, so we just send whole pages as single Documents.

In [5]:
sess = conf_session(CONF_USER, CONF_API_TOKEN)

space_key = resolve_space_key_by_name(sess, CONF_CLOUD_ID, SPACE_NAME)
assert space_key, f"Space named '{SPACE_NAME}' not found. Check spelling/case or your permissions."

cql = build_cql(space_key, LABELS, SINCE_HOURS)
print("SPACE_NAME:", SPACE_NAME)
print("SPACE_KEY: ", space_key)
print("CQL:       ", cql)

documents = []
count = 0

for page in conf_search_pages(sess, CONF_CLOUD_ID, cql):
    pid   = page.get("id")
    title = page.get("title", "")
    body_html = (((page.get("body") or {}).get("export_view") or {}).get("value")) or ""
    space_key = ((page.get("space") or {}).get("key")) or ""
    url = f"https://api.atlassian.com/ex/confluence/{CONF_CLOUD_ID}/wiki/rest/api/content/{pid}"

    md = html_to_markdown(body_html)
    if not md:
        continue

    documents.append(
        Document(
            document_id=f"conf-{pid}",
            content=md,
            mime_type="text/markdown",
            metadata={
                "source": "confluence",
                "source_url": url,
                "title": title,
                "space_key": space_key,
            },
        )
    )
    count += 1

print(f"Prepared {count} Confluence pages from space '{SPACE_NAME}'")



SPACE_NAME: Known Issues
SPACE_KEY:  KI
CQL:        type=page and space="KI"
Prepared 5 Confluence pages from space 'Known Issues'


Cell 6 — insert into vector DB (server-side chunking)

In [6]:
if documents:
    client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
        # Let the server chunk; adjust to taste:
        chunk_size_in_tokens=512,
    )
    print(f"Inserted {len(documents)} documents into {vector_db_id}")
else:
    print("No documents to insert (check your filters/CQL).")

INFO:httpx:HTTP Request: POST http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/tool-runtime/rag-tool/insert "HTTP/1.1 200 OK"


Inserted 5 documents into vs_15c9ad54-989d-4d1b-9a81-3bbb13f25b33


Cell 7 — pick an LLM and create a tiny RAG agent

In [7]:
# Choose an LLM served by vLLM (same pattern as your working sample)
llm = next(m for m in client.models.list() if m.model_type == "llm" and m.provider_id == "vllm-inference")
model_id = llm.identifier
print("Using model:", model_id)

rag_agent = Agent(
    client,
    model=model_id,
    instructions=(
        "You are a helpful assistant. Use the RAG tool. "
        "Cite source_url(s) when you use retrieved info."
    ),
    tools=[{
        "name": "builtin::rag/knowledge_search",
        "args": {"vector_db_ids": [vector_db_id]}
    }],
)

session_id = rag_agent.create_session(session_name=f"s{uuid.uuid4().hex}")
session_id


INFO:httpx:HTTP Request: GET http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/models "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/agents "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/tools?toolgroup_id=builtin%3A%3Arag%2Fknowledge_search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/agents/ba39652b-88cc-4d09-a25d-204756f823b9/session "HTTP/1.1 200 OK"


Using model: vllm-inference/llama-4-scout-17b-16e-w4a16


'393c2b91-3391-43f4-b9ea-4963745876f6'

Cell 8 — ask something and print a short answer with citations

In [11]:

turns = ["Summarise the resolution for when Disk full on /var. Get the information from our Confluence docs."]
for t in turns:
    print("user>", t)
    stream = rag_agent.create_turn(
        messages=[{"role": "user", "content": t}], session_id=session_id, stream=True
    )
    for event in AgentEventLogger().log(stream):
        event.print()

INFO:httpx:HTTP Request: POST http://lsd-llama-milvus-inline-service.default.svc.cluster.local:8321/v1/agents/ba39652b-88cc-4d09-a25d-204756f823b9/session/393c2b91-3391-43f4-b9ea-4963745876f6/turn "HTTP/1.1 200 OK"


user> Summarise the resolution for when Disk full on /var. Get the information from our Confluence docs.
[33minference> [0m[33m[0m[97m[0m
[32mtool_execution> Tool:knowledge_search Args:{'query': 'Disk full on /var resolution Confluence docs'}[0m
[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text='Result 1\nContent: a08ad97bfa1)\n* [Disk full on /var causes 500s & logging failures](/wiki/spaces/KI/pages/15400972/Disk+full+on+var+causes+500s+logging+failures "Known Issues")\n  Oct 21, 2025 • contributed by [Chris Renwick](/wiki/display/~70121%3A4d81fc9f-3b77-4a63-ba64-2a08ad97bfa1)\n* [Known Issues](/wiki/spaces/KI/overview "Known Issues")\n  Oct 21, 2025 • contributed by [Chris Renwick](/wiki/display/~70121%3A4d81fc9f-3b77-4a63-ba64-2a08ad97bfa1)\nMetadata: {\'chunk_id\': \'ffb8e547-ea03-0714-5d63-d8390b1c4618\', \'document_id\': \'file-8b