In [None]:
# Install dependencies (only needed once per environment)
!pip install requests beautifulsoup4 llama_stack llama-stack-client

In [None]:
import requests
import re
from bs4 import BeautifulSoup
from urllib.parse import urljoin

url = "https://events.afcea.org/tip25/Public/sessions.aspx?ID=115436&View=Sessions&sortMenu=102001"
resp = requests.get(url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")

sessions = []

# Outer loop: each time slot
for outer in soup.select("div.panel-body[datetime]"):
    session_time = outer.get("datetime")

    # Inner loop: each session under this time slot
    for panel in outer.select("div.panel.panel-default > div.panel-body"):
        # Title
        title_tag = panel.select_one("div.media-body a b")
        title = title_tag.get_text(strip=True) if title_tag else None

        # Short description
        desc_tag = panel.select_one("div[id^='SessionDescriptionForSession']")
        description = desc_tag.get_text(strip=True) if desc_tag else None

        read_more_link = desc_tag.select_one("a") if desc_tag else None
        if read_more_link and "...read more..." in read_more_link.get_text(strip=True).lower():
            onclick = read_more_link.get("onclick", "")
            # Extract the URL inside showFeatures('SessionDetails','...')
            m = re.search(r"showFeatures\('SessionDetails','([^']+)'", onclick)
            if m:
                detail_url = urljoin(url, m.group(1))

            try:
                detail_resp = requests.get(detail_url)
                detail_resp.raise_for_status()
                detail_soup = BeautifulSoup(detail_resp.text, "html.parser")

                container = detail_soup.select_one("div#SessionContainer")
                if container:
                    # Prefer content under the "Description" section.
                    # Gather all <p> elements; preserve paragraphs.
                    paragraphs = [p.get_text(" ", strip=True) for p in container.select("p")]

                    # Unconditionally replace with full description if we got any text
                    full_description = "\n\n".join([p for p in paragraphs if p]) if paragraphs else None
                    if full_description:
                        description = full_description

            except Exception as e:
                print(f"Failed to fetch full description for {title or 'Unknown'}: {e}")

        # Room
        room_li = panel.select_one("ul.list-inline li")
        room = room_li.get_text(strip=True) if room_li and "Room:" in room_li.get_text() else None
        
        # Tracks
        tracks = [a.get_text(strip=True) for a in panel.select("ul li a")]
        
        # Sponsor
        sponsor_img = panel.select_one("a.aa-sponsor img")
        sponsor = sponsor_img.get("title") or sponsor_img.get("alt") if sponsor_img else None
        
        # Speakers
        speakers = []
        for thumb in panel.select("div.thumbnail"):
            name_tag = thumb.select_one("h5.media-heading a")
            name = name_tag.get_text(strip=True) if name_tag else None
            h6s = [h.get_text(strip=True) for h in thumb.select("h6.media-heading") if h.get_text(strip=True)]
            role = h6s[0] if len(h6s) > 0 else None
            org  = h6s[1] if len(h6s) > 1 else None
            if name:
                speakers.append({"name": name, "role": role, "organization": org})
        
        if title:
            sessions.append({
                "title": title,
                "time": session_time,
                "room": room,
                "description": description,
                "tracks": tracks,
                "sponsor": sponsor,
                "speakers": speakers
            })

print(f"Found {len(sessions)} sessions")
for s in sessions:
    print(s)

In [None]:
from llama_stack_client import LlamaStackClient

client = LlamaStackClient(base_url="http://lsd-llama-milvus-service.rag.svc.cluster.local:8321")

In [None]:
# Fetch all registered models
models = client.models.list()

print(models)

In [None]:
model_id = next(m.identifier for m in models if m.model_type == "llm")

embedding_model = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding_model.identifier
embedding_dimension = embedding_model.metadata["embedding_dimension"]

In [None]:
vector_db_id = "my-milvus-db"
provider_id  = "milvus"

_ = client.vector_dbs.register(
vector_db_id=vector_db_id,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
provider_id=provider_id,
)
print(f"Registered vector DB: {vector_db_id}")

In [None]:
print(client.vector_dbs.list())

In [None]:
def session_to_text(session):
    speakers_str = "\n".join(
        [f"- {sp['name']} ({sp['role']}, {sp['organization']})"
         for sp in session['speakers']]
    )
    return f"""
    Title: {session['title']}
    Time: {session['time']}
    Room: {session['room']}
    Description: {session['description']}
    Tracks: {', '.join(session['tracks'])}
    Sponsor: {session['sponsor']}
    Speakers:
    {speakers_str}
    """


In [None]:
from llama_stack_client import RAGDocument

documents = []
for i, session in enumerate(sessions, start=1):
    text = session_to_text(session)
    documents.append(
        RAGDocument(
            document_id=f"session-{i}",
            content=text,
            mime_type="text/plain",
            metadata={
                "source": url,
                "title": session["title"],
                "time": session["time"],
                "room": session["room"],
                "tracks": session["tracks"],
                "sponsor": session["sponsor"]
            }
        )
    )

batch_size = 10
for i in range(0, len(documents), batch_size):
    batch = documents[i:i+batch_size]
    client.tool_runtime.rag_tool.insert(
        documents=batch,
        vector_db_id="my-milvus-db",
        chunk_size_in_tokens=200,
        timeout=60
    )
print("Sessions ingested successfully")

In [None]:
query="Which sessions cover AI in the Indo-Pacific?"

result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_id],
    content=query,
)
print("Low-level query result:", result)

In [None]:
query = "Which sessions cover AI in the Indo-Pacific?"

result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_id],
    content=query,
)

print(f"Query: {query}\n")

# Deduplicate by document_id
seen = set()
for i, match in enumerate(result.metadata["document_ids"]):
    doc_id = result.metadata["document_ids"][i]
    chunk_text = result.metadata["chunks"][i]
    score = result.metadata["scores"][i]

    if doc_id in seen:
        continue
    seen.add(doc_id)

    print(f"Result {len(seen)} (score={score:.3f})")
    print(chunk_text.strip()[:500])  # show first ~500 chars
    print("-" * 80)