In [44]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import re, time, pandas as pd
import undetected_chromedriver as uc

In [None]:

def get_dentists_selenium(zipcode):
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-blink-features=AutomationControlled")
    driver = uc.Chrome(options=options)

    driver.get(f"https://findadentist.ada.org/dentists?address={zipcode}")
    time.sleep(7)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    cards = soup.select('div.profile-card-wrapper')
    print(f"Found {len(cards)} dentist cards")

    for card in cards[:3]:  # just peek at a few
        print(card.get_text(strip=True)[:300])
get_dentists_selenium("02453")

Found 100 dentist cards
Dr. Damian D MeolaGeneral Practice131 Main St Apt 1 Waltham, MA 02453-6636Details
Dr. Robert J LindbergGeneral Practice32 South St Ste 202 Waltham, MA 02453-3555Details
Dr. Tacuhi Sona TacvorianGeneral Practice29 Spruce St Unit 1 Waltham, MA 02453-4306Armenian, Romanian, SpanishDetails


In [26]:
def get_dentists_selenium(zipcode):
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(options=options)

    driver.get(f"https://findadentist.ada.org/dentists?address={zipcode}")
    time.sleep(5)  # let JS load the results
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    print(soup.prettify()[:1000])
    
get_dentists_selenium("02108")

<html dir="ltr" lang="en-US">
 <head>
  <title>
   Just a moment...
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
  <meta content="noindex,nofollow" name="robots"/>
  <meta content="width=device-width,initial-scale=1" name="viewport"/>
  <style>
   *{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}.main-content{margin:8rem auto;max-width:60rem;padding-left:1.5rem}@media (width <= 720px){.main-content{margin-top:4rem}}.h2{font-size:1.5rem;font-weight:500;line-height:2.25rem}@media (width <= 720px){.h2{font-size:1.25rem;line-height:1.5rem}}#challenge-error-text{background-image:url(data:image/s

In [46]:
from urllib.parse import urljoin

def extract_phone_from_detail(driver, partial_href):
    url = urljoin("https://findadentist.ada.org", partial_href)
    driver.get(url)
    time.sleep(5)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    phone_tag = soup.select_one('p.t1')
    return phone_tag.get_text(strip=True) if phone_tag else ""


In [47]:
def extract_emails_phones(text):
    emails = re.findall(r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", text)
    phones = re.findall(r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", text)
    return emails, phones

def get_dentists_selenium(zipcode):
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-blink-features=AutomationControlled")
    driver = uc.Chrome(options=options)

    driver.get(f"https://findadentist.ada.org/dentists?address={zipcode}")
    time.sleep(7)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    cards = soup.select('div.profile-card-wrapper')
    print(f"Found {len(cards)} dentist cards")

    results = []

    for card in soup.select('div.profile-card-wrapper'):
        details = card.select_one('div.profile-card-wrapper__details')
        if not details:
            continue

        name_tag = details.select_one('h2.h3 a')
        name = name_tag.get_text(strip=True) if name_tag else "N/A"

        address_tag = details.select_one('div.m-t-6')
        address = address_tag.get_text(strip=True) if address_tag else ""

        # ✅ Extract link to dentist profile page
        detail_link_tag = card.select_one('a.button-main--secondary')
        href = detail_link_tag['href'] if detail_link_tag and 'href' in detail_link_tag.attrs else None

        # ✅ Visit the detail page and get the phone number
        phone = extract_phone_from_detail(driver, href) if href else ""

        results.append({
            'Name': name,
            'Address': address,
            'Phone': phone,
            'Detail URL': href,
            'ZIP': zipcode
        })

    print(f"Extracted {len(results)} dentists for ZIP {zipcode}")
    driver.quit()
    return results

In [48]:
all_dentists = []
zip_codes = ['02108', '02115', '02210', '01890', '01720',"02453"]  # Add more ZIPs here

for zip_code in zip_codes:
    print(f"Scraping ZIP: {zip_code}")
    try:
        all_dentists.extend(get_dentists_selenium(zip_code))
    except Exception as e:
        print(f"Error on {zip_code}: {e}")
    time.sleep(2)

df = pd.DataFrame(all_dentists)
df.to_csv("massachusetts_dentists.csv", index=False)


Scraping ZIP: 02108
Found 100 dentist cards


KeyboardInterrupt: 