In [1]:
# Install dependencies (only needed once per environment)
!pip install requests beautifulsoup4 llama_stack llama-stack-client

Collecting llama_stack
  Downloading llama_stack-0.3.2-py3-none-any.whl.metadata (15 kB)
Collecting llama-stack-client
  Downloading llama_stack_client-0.3.2-py3-none-any.whl.metadata (18 kB)
Collecting aiohttp (from llama_stack)
  Downloading aiohttp-3.13.2-cp312-cp312-win_amd64.whl.metadata (8.4 kB)
Collecting fastapi<1.0,>=0.115.0 (from llama_stack)
  Downloading fastapi-0.121.2-py3-none-any.whl.metadata (28 kB)
Collecting fire (from llama_stack)
  Using cached fire-0.7.1-py3-none-any.whl.metadata (5.8 kB)
Collecting openai>=1.107 (from llama_stack)
  Downloading openai-2.8.0-py3-none-any.whl.metadata (29 kB)
Collecting python-dotenv (from llama_stack)
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting pyjwt>=2.10.0 (from pyjwt[crypto]>=2.10.0->llama_stack)
  Downloading PyJWT-2.10.1-py3-none-any.whl.metadata (4.0 kB)
Collecting pydantic>=2.11.9 (from llama_stack)
  Downloading pydantic-2.12.4-py3-none-any.whl.metadata (89 kB)
Collecting rich (from llama_


[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: C:\Users\cmays\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [1]:
import re
from bs4 import BeautifulSoup

# Read the HTML from a local file
html_file_path = "agenda.html"

with open(html_file_path, "r", encoding="utf-8") as f:
    html_content = f.read()

soup = BeautifulSoup(html_content, "html.parser")

sessions = []

# Find all panel containers
panels = soup.select("div.panel.panel-default")

for panel in panels:
    # Extract the day from the panel heading
    day = None
    panel_heading = panel.select_one("div.panel-heading h4.panel-title a")
    if panel_heading:
        day = panel_heading.get_text(strip=True)

    # Find all agenda items within this panel
    panel_body = panel.select_one("div.panel-collapse")
    if not panel_body:
        continue

    agenda_items = panel_body.select("div.agendaItem.col-sm-12")

    for item in agenda_items:
        # Time section (left side - col-sm-2)
        time_section = item.select_one("div.agendaTime.col-sm-2")

        session_time = None
        room = None
        track_tags = []

        if time_section:
            time_tag = time_section.select_one("div.agendaTime02")
            session_time = time_tag.get_text(strip=True) if time_tag else None

            loc_tag = time_section.select_one("div.agendaLocation02")
            room = loc_tag.get_text(strip=True) if loc_tag else None

            # Track tags from divs with class matching agendaTag0X pattern
            for div in time_section.find_all("div"):
                if div.get("class"):
                    for class_name in div.get("class"):
                        if class_name.startswith("agendaTag"):
                            track_text = div.get_text(strip=True)
                            if track_text:
                                track_tags.append(track_text)

        # Content section (right side - col-sm-10)
        content_section = item.select_one("div.agendaItem.col-sm-10")

        title = None
        subtitle = None
        present_text = ""
        speakers = []

        if content_section:
            # Get all trackTitle divs
            track_titles = content_section.select("div.trackTitle")

            # Filter out the track category title (it has tag03 class)
            if track_titles:
                actual_titles = [t for t in track_titles if not any(c.startswith('tag') for c in t.get('class', []))]
                if actual_titles:
                    title = actual_titles[0].get_text(strip=True)
                elif track_titles:
                    title = track_titles[0].get_text(strip=True)

            # Subtitle
            subtitle_tag = content_section.select_one("div.trackSubTitle")
            subtitle = subtitle_tag.get_text(strip=True) if subtitle_tag else None

            # Description/presenter info
            present_tag = content_section.select_one("div.agendaPresent")
            if present_tag:
                present_text = present_tag.get_text("\n", strip=True)

            # Extract speakers from paragraph tags
            speaker_paragraphs = content_section.select("p")
            for p in speaker_paragraphs:
                # Get text with line breaks preserved
                speaker_text = p.get_text("\n", strip=True)

                # Split by line breaks to get individual speakers
                speaker_lines = [line.strip() for line in speaker_text.split('\n') if line.strip()]

                for line in speaker_lines:
                    # Skip lines that are just labels or empty
                    if not line or line.lower().startswith('panelist') or line.lower().startswith('moderator'):
                        continue

                    # Check for confirmation status
                    confirmed = False
                    invited = False

                    # Look for (confirmed) or (invited) at the end
                    if re.search(r'\(confirmed\)\s*$', line, re.IGNORECASE):
                        confirmed = True
                    elif re.search(r'\(invited\)\s*$', line, re.IGNORECASE):
                        invited = True

                    # Remove status markers for parsing
                    clean_line = re.sub(r'\s*\([^)]*\)\s*$', '', line)

                    # Try to split by " - " to separate name from title/org
                    if ' - ' in clean_line:
                        parts = clean_line.split(' - ', 1)
                        name = parts[0].strip()

                        # Try to split title and organization
                        title_org = parts[1].strip() if len(parts) > 1 else ""

                        # Try to split by comma to get title and org
                        if ',' in title_org:
                            role_parts = title_org.split(',', 1)
                            role = role_parts[0].strip()
                            organization = role_parts[1].strip() if len(role_parts) > 1 else ""
                        else:
                            role = title_org
                            organization = ""

                        speakers.append({
                            "name": name,
                            "role": role,
                            "organization": organization,
                            "confirmed": confirmed,
                            "invited": invited
                        })

        # Build description from subtitle + agendaPresent
        parts = []
        if subtitle:
            parts.append(subtitle)
        if present_text:
            parts.append(present_text)
        description = "\n\n".join(parts) if parts else None

        # Extract sponsor from agendaPresent or from links
        sponsor = None
        if content_section:
            present_tag = content_section.select_one("div.agendaPresent")
            if present_tag:
                # Check for "Sponsored By" text
                m = re.search(r"Sponsored\s+By[:\-]?\s*(.+)", present_text, flags=re.IGNORECASE)
                if m:
                    sponsor = m.group(1).strip()
                else:
                    # Check for links in agendaPresent
                    sponsor_link = present_tag.find("a")
                    if sponsor_link:
                        sponsor = sponsor_link.get_text(strip=True)

        # Use track tags found in the time section
        tracks = track_tags

        # Only add sessions that have a title
        if title:
            sessions.append({
                "title": title,
                "day": day,
                "time": session_time,
                "room": room,
                "description": description,
                "tracks": tracks,
                "sponsor": sponsor,
                "speakers": speakers,
            })

print(f"Found {len(sessions)} sessions")
for s in sessions:  # show first 10 for sanity check
    print(s)

Found 85 sessions
{'title': 'Squadron Commanders Course (Invitation Only)', 'day': 'Monday 1 December 2025', 'time': '0800 - 1700', 'room': 'Wisteria/Sunflower', 'description': None, 'tracks': ['Human Capital'], 'sponsor': None, 'speakers': []}
{'title': '19th Hole Social', 'day': 'Monday 1 December 2025', 'time': '1400 - 1600', 'room': 'Riverbend Pavilion', 'description': 'Sponsored By:\nSalesForce', 'tracks': [], 'sponsor': 'SalesForce', 'speakers': []}
{'title': 'Registration', 'day': 'Tuesday 2 December 2025', 'time': '0800 - 1700', 'room': '2nd Floor Ballroom Level', 'description': 'Sponsored By:\nPeraton', 'tracks': [], 'sponsor': 'Peraton', 'speakers': []}
{'title': 'XCOMM Executive Council (Invitation Only)', 'day': 'Tuesday 2 December 2025', 'time': '0800 - 1700', 'room': 'Sunflower', 'description': None, 'tracks': ['Human Capital'], 'sponsor': None, 'speakers': []}
{'title': 'Squadron Commanders Course (Invitation Only)', 'day': 'Tuesday 2 December 2025', 'time': '0800 - 1700

In [None]:
from llama_stack_client import LlamaStackClient

client = LlamaStackClient(base_url="http://lsd-llama-milvus-service.rag.svc.cluster.local:8321")

In [None]:
# Fetch all registered models
models = client.models.list()

print(models)

In [None]:
model_id = next(m.identifier for m in models if m.model_type == "llm")

embedding_model = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding_model.identifier
embedding_dimension = embedding_model.metadata["embedding_dimension"]

In [None]:
vector_db_id = "my-milvus-db"
provider_id  = "milvus"

_ = client.vector_dbs.register(
vector_db_id=vector_db_id,
embedding_model=embedding_model_id,
embedding_dimension=embedding_dimension,
provider_id=provider_id,
)
print(f"Registered vector DB: {vector_db_id}")

In [None]:
dbs = client.vector_dbs.list()
identifier = dbs[0].identifier
print(identifier)

In [None]:
def session_to_text(session):
    speakers_str = "\n".join(
        [f"- {sp['name']} ({sp['role']}, {sp['organization']})"
         for sp in session['speakers']]
    )
    return f"""
    Title: {session['title']}
    Time: {session['time']}
    Room: {session['room']}
    Description: {session['description']}
    Tracks: {', '.join(session['tracks'])}
    Sponsor: {session['sponsor']}
    Speakers:
    {speakers_str}
    """


In [None]:
from llama_stack_client import RAGDocument

documents = []
for i, session in enumerate(sessions, start=1):
    text = session_to_text(session)
    documents.append(
        RAGDocument(
            document_id=f"session-{i}",
            content=text,
            mime_type="text/plain",
            metadata={
                "source": "https://www.alamoafcea.org/mpage/2025exhibitors",
                "type": "session",
                "title": session["title"],
                "time": session["time"],
                "room": session["room"],
                "tracks": session["tracks"],
                "sponsor": session["sponsor"]
            }
        )
    )

batch_size = 10
for i in range(0, len(documents), batch_size):
    batch = documents[i:i+batch_size]
    client.tool_runtime.rag_tool.insert(
        documents=batch,
        vector_db_id=identifier,
        chunk_size_in_tokens=200,
        timeout=60
    )
print("Sessions ingested successfully")

In [None]:
query="Which sessions cover AI in the Indo-Pacific?"

result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[identifier],
    content=query,
)
print("Low-level query result:", result)

In [None]:
query = "Which sessions cover AI in the Indo-Pacific?"

result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[identifier],
    content=query,
)

print(f"Query: {query}\n")

# Deduplicate by document_id
seen = set()
for i, match in enumerate(result.metadata["document_ids"]):
    doc_id = result.metadata["document_ids"][i]
    chunk_text = result.metadata["chunks"][i]
    score = result.metadata["scores"][i]

    if doc_id in seen:
        continue
    seen.add(doc_id)

    print(f"Result {len(seen)} (score={score:.3f})")
    print(chunk_text.strip()[:500])  # show first ~500 chars
    print("-" * 80)

In [None]:
dbs = client.vector_dbs.list()
if not dbs:
    print("No vector DBs found.")

for db in dbs:
    try:
        print(f"Unregistering {db.vector_db_name} ({db.identifier}) ...")
        client.vector_dbs.unregister(db.identifier)
        print(f"✔ Deleted {db.identifier}")
    except Exception as e:
        print(f"⚠ Failed to delete {db.identifier}: {e}")