In [None]:
!pip install requests beautifulsoup4 llama_stack llama-stack-client selenium

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd
import time

# --- Configure Selenium (headless Chrome) ---
options = Options()
options.binary_location = "/usr/bin/google-chrome-stable"  # point to Chrome
options.add_argument("--headless=new")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")

driver = webdriver.Chrome(
    service=Service("/usr/local/bin/chromedriver"),  # baked-in driver
    options=options
)

driver.get("https://tip25.myexpoonline.com/exhibitors")

exhibitors = []

while True:
    # Let JS render
    time.sleep(2)

    # Parse current page
    soup = BeautifulSoup(driver.page_source, "html.parser")
    table = soup.select_one("table.table.table-sm")
    if not table:
        break

    for row in table.select("tbody tr"):
        cols = row.find_all("td")
        if len(cols) < 7:
            continue

        # Column 2: company name + link
        company_link = cols[1].find("a")
        company_name = company_link.get_text(strip=True) if company_link else None
        company_url = company_link["href"] if company_link else None

        # Column 3: booth
        booth = cols[2].get_text(strip=True)

        # Column 4: first-time exhibitor
        first_time = False
        img = cols[3].find("img")
        if img and img.get("alt") == "First Time Exhibitor":
            first_time = True

        # Column 5: AFCEA member
        afcea_member = False
        img = cols[4].find("img")
        if img and img.get("alt") == "AFCEA Member":
            afcea_member = True

        # Column 7: press release
        press_release = False
        span = cols[6].find("span", {"title": "Digital Listing"})
        if span:
            press_release = True

        exhibitors.append({
            "company": company_name,
            "url": company_url,
            "booth": booth,
            "first_time_exhibitor": first_time,
            "afcea_member": afcea_member,
            "press_release": press_release
        })

    # Try to click the "next" button
    try:
        next_btn = driver.find_element(By.CSS_SELECTOR, "a.pager-right-next")
        # If button is disabled, break
        if "disabled" in next_btn.get_attribute("class"):
            break
        next_btn.click()
    except NoSuchElementException:
        break

driver.quit()

# Convert to DataFrame for Jupyter exploration
df = pd.DataFrame(exhibitors)
print(f"Scraped {len(df)} exhibitors")
df.head()