In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")

driver = webdriver.Chrome(options=chrome_options)
wait = WebDriverWait(driver, 10)

all_data = []

# Start from page 1
page = 1
while True:
    url = f"https://www.bedford.k12.mi.us/about/directory?const_page={page}&"
    driver.get(url)
    
    # Wait until the cards load
    try:
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "fsConstituentItem")))
    except:
        print(f"No more pages after page {page-1}.")
        break

    soup = BeautifulSoup(driver.page_source, 'lxml')

    cards = soup.find_all("div", class_="fsConstituentItem")
    if not cards:
        print(f"No more cards found at page {page}. Ending scrape.")
        break

    for card in cards:
        name = card.find("h3", class_="fsFullName")
        title = card.find("div", class_="fsTitles")
        dept = card.find("div", class_="fsDepartments")
        email = card.find("div", class_="fsEmail")
        phone = card.find("div", class_="fsPhones")

        all_data.append({
            "Name": name.get_text(strip=True) if name else "",
            "Title": title.get_text(separator=" ", strip=True).replace("Titles:", "") if title else "",
            "Department": dept.get_text(separator=" ", strip=True).replace("Departments:", "") if dept else "",
            "Email": email.a.get_text(strip=True) if email and email.a else "",
            "Phone": phone.a.get_text(strip=True) if phone and phone.a else ""
        })

    print(f"Page {page} scraped, {len(cards)} records found.")
    page += 1
    time.sleep(1)  # small pause to avoid server overload

# Save to CSV
pd.set_option('display.max_rows', None)

df = pd.DataFrame(all_data)

from IPython.display import display
display(df)
driver.quit()


Page 1 scraped, 6 records found.
Page 2 scraped, 6 records found.
Page 3 scraped, 6 records found.
Page 4 scraped, 6 records found.
Page 5 scraped, 6 records found.
Page 6 scraped, 6 records found.
Page 7 scraped, 6 records found.
Page 8 scraped, 6 records found.
Page 9 scraped, 6 records found.
Page 10 scraped, 6 records found.
Page 11 scraped, 6 records found.
Page 12 scraped, 6 records found.
Page 13 scraped, 6 records found.
Page 14 scraped, 6 records found.


WebDriverException: Message: tab crashed
  (Session info: chrome=140.0.7339.129)
Stacktrace:
	GetHandleVerifier [0x0x7ff60dfe30f5+79493]
	GetHandleVerifier [0x0x7ff60dfe3150+79584]
	(No symbol) [0x0x7ff60dd5ffdc]
	(No symbol) [0x0x7ff60dd4d3fc]
	(No symbol) [0x0x7ff60dd4afcc]
	(No symbol) [0x0x7ff60dd4ba4b]
	(No symbol) [0x0x7ff60dd5a93e]
	(No symbol) [0x0x7ff60dd71490]
	(No symbol) [0x0x7ff60de08d85]
	(No symbol) [0x0x7ff60dde04e3]
	(No symbol) [0x0x7ff60dda8e92]
	(No symbol) [0x0x7ff60dda9c63]
	GetHandleVerifier [0x0x7ff60e2a0dbd+2954061]
	GetHandleVerifier [0x0x7ff60e29b02a+2930106]
	GetHandleVerifier [0x0x7ff60e2bb357+3061991]
	GetHandleVerifier [0x0x7ff60dffd60e+187294]
	GetHandleVerifier [0x0x7ff60e00557f+219919]
	GetHandleVerifier [0x0x7ff60dfec294+116772]
	GetHandleVerifier [0x0x7ff60dfec449+117209]
	GetHandleVerifier [0x0x7ff60dfd2618+11176]
	BaseThreadInitThunk [0x0x7ffaab6ee8d7+23]
	RtlUserThreadStart [0x0x7ffaac708d9c+44]


In [10]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time

# Initialize Selenium WebDriver
driver = webdriver.Chrome()  # Make sure chromedriver is in PATH

all_data = []  # To store all staff data

# Loop through multiple pages if needed (here example: pages 1-5)
for page in range(1, 5):
    url = f"https://www.bedford.k12.mi.us/about/directory?const_page={page}&"
    driver.get(url)
    time.sleep(2)  # Wait for page to load
    
    html = driver.page_source
    soup = BeautifulSoup(html, 'lxml')
    
    for card in soup.find_all("div", class_="contact-box"):
        name_tag = card.find("div", class_="name")
        name = name_tag.text.strip() if name_tag else ""
        
        title_tag = card.find("div", class_="title")
        title = title_tag.text.strip() if title_tag else ""
        
        email_tag = card.find("div", class_="email")
        email = email_tag.text.strip() if email_tag else ""
        
        phone_tag = card.find("div", class_="phone")
        phone = phone_tag.text.strip() if phone_tag else ""
        
        all_data.append({
            "Name": name,
            "Title": title,
            "Email": email,
            "Phone": phone
        })

# Create DataFrame
pd.set_option('display.max_rows', None)

df = pd.DataFrame(all_data)

from IPython.display import display
display(df)

driver.quit()
