In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
import time
import csv

# Setup Chrome options
ua = UserAgent()
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument(f"user-agent={ua.random}")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)

# Open CSV to write output
with open('therapists.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['Name', 'Profile URL', 'Phone', 'Bio'])

    page = 1
    while True:
        url = f'https://www.psychologytoday.com/us/therapists?page={page}'
        print(f"[INFO] -> Loading page {page}: {url}")
        driver.get(url)
        time.sleep(4)  # Wait for JavaScript to load

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        cards = soup.select('div.card.profile-card')

        if not cards:
            print("[INFO] No more profiles — stopping.")
            break

        print(f"[INFO] Found {len(cards)} profiles on page {page}")
        for card in cards:
            try:
                name_tag = card.select_one('.profile-name')
                link_tag = card.select_one('a.profile-link')
                phone_tag = card.select_one('.profile-phone')
                bio_tag = card.select_one('.profile-description')

                name = name_tag.get_text(strip=True) if name_tag else 'N/A'
                href = link_tag['href'] if link_tag else ''
                link = 'https://www.psychologytoday.com' + href if href else 'N/A'
                phone = phone_tag.get_text(strip=True) if phone_tag else 'N/A'
                bio = bio_tag.get_text(strip=True) if bio_tag else 'N/A'

                writer.writerow([name, link, phone, bio])
                print(f"  • {name}")
            except Exception as e:
                print(f"[WARN] Failed to parse a profile card: {e}")

        page += 1
        time.sleep(2)

# Clean up
driver.quit()
print("[DONE] therapists.csv created.")


KeyboardInterrupt: 