In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import subprocess  # To run AppleScript
import time
import pandas as pd

# Path to ChromeDriver
driver_path = "/opt/homebrew/bin/chromedriver"
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

# Open the website
driver.get("https://recruiterdb.web.app/")

# Wait for user to log in manually
input("Log in manually, then press Enter here to continue...")

# Allow time for page to load
time.sleep(5)

# List to store extracted emails
data = []

# Extract emails for all recruiters
while True:
    recruiters = driver.find_elements(By.XPATH, "//table/tbody/tr")
    
    if not recruiters:
        print("No recruiters found.")
        break

    for recruiter in recruiters:
        try:
            name = recruiter.find_element(By.XPATH, "./td[2]").text  # Extract recruiter name
            email_button = recruiter.find_element(By.XPATH, ".//td[5]//button")  # Email icon button
            
            # Click email button to open Mail app
            email_button.click()
            time.sleep(5)  # Allow Mail to open

            # **AppleScript to Copy Email & Force Close Window**
            applescript = '''
            tell application "Mail"
                activate
                delay 2
                tell application "System Events"
                    tell process "Mail"
                        -- Shift focus from Subject to "To" field
                        keystroke tab using shift down
                        keystroke tab using shift down
                        delay 1
                        -- Select and copy email
                        keystroke "a" using {command down}
                        keystroke "c" using {command down}
                        delay 1
                        -- Close the Mail window
                        keystroke "w" using {command down}
                        delay 2
                        -- Force press "Don't Save" (Cmd + D)
                        key code 2 using {command down}
                        delay 1
                    end tell
                end tell
                return the clipboard
            end tell
            '''

            result = subprocess.run(["osascript", "-e", applescript], capture_output=True, text=True)
            email = result.stdout.strip() if result.stdout else "N/A"

            # Store data
            data.append([name, email])

            print(f"✅ Extracted Name: {name}, Email: {email}")

            time.sleep(2)  # Give time before moving to next recruiter

        except Exception as e:
            print(f"❌ Error extracting email for {name}: {e}")
            continue

    # Try to click "Load More" if available, otherwise stop
    try:
        load_more = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(),'Load More')]"))
        )
        load_more.click()
        time.sleep(3)  # Allow new recruiters to load
    except:
        print("✅ All recruiters processed.")
        break

# Save data to Excel
df = pd.DataFrame(data, columns=["Name", "Email"])
df.to_excel("recruiters.xlsx", index=False)

# Close browser
driver.quit()
print("🚀 Email extraction complete! Data saved to 'recruiters.xlsx'.")


✅ Extracted Name: Christie Kuhn, Email: rchrkuhn@amazon.jobs
✅ Extracted Name: Tara Hopkins, Email: thopkins@ups.com
✅ Extracted Name: Barbara Rivera, Email: barbara.rivera@jpmchase.com
✅ Extracted Name: Violet Rylo, Email: violet.rylo@mbww.com
✅ Extracted Name: Olga Bartolomeo, Email: Olga.Bartolomeo@citi.com
✅ Extracted Name: Christie Kuhn, Email: rchrkuhn@amazon.jobs
✅ Extracted Name: Tara Hopkins, Email: thopkins@ups.com
❌ Error extracting email for Tara Hopkins: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=133.0.6943.99)
Stacktrace:
0   chromedriver                        0x0000000102b26bac cxxbridge1$str$ptr + 2724820
1   chromedriver                        0x0000000102b1f20c cxxbridge1$str$ptr + 2693684
2   chromedriver                        0x0000000102685afc cxxbridge1$string$len + 93348
3   chromedriver                        0x000000010266ea50 chromedriver + 191056


In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import subprocess  # To run AppleScript
import time
import pandas as pd

# Path to ChromeDriver
driver_path = "/opt/homebrew/bin/chromedriver"
service = Service(driver_path)
driver = webdriver.Chrome(service=service)

# Open the website
driver.get("https://recruiterdb.web.app/")

# Wait for user to log in manually
input("Log in manually, then press Enter here to continue...")

# Allow time for page to load
time.sleep(5)

# List to store extracted emails
data = []
scraped_names = set()  # To track already scraped recruiters

# Extract emails for all recruiters
while True:
    recruiters = driver.find_elements(By.XPATH, "//table/tbody/tr")

    if not recruiters:
        print("No recruiters found.")
        break

    for i in range(len(recruiters)):  # Use index-based iteration to avoid stale elements
        try:
            recruiters = driver.find_elements(By.XPATH, "//table/tbody/tr")  # Re-fetch updated recruiters
            recruiter = recruiters[i]
            
            name = recruiter.find_element(By.XPATH, "./td[2]").text  # Extract recruiter name

            # Skip if already scraped
            if name in scraped_names:
                continue

            email_button = recruiter.find_element(By.XPATH, ".//td[5]//button")  # Email icon button
            
            # Click email button to open Mail app
            email_button.click()
            time.sleep(5)  # Allow Mail to open

            # **AppleScript to Copy Email & Force Close Window**
            applescript = '''
            tell application "Mail"
                activate
                delay 2
                tell application "System Events"
                    tell process "Mail"
                        -- Shift focus from Subject to "To" field
                        keystroke tab using shift down
                        keystroke tab using shift down
                        delay 1
                        -- Select and copy email
                        keystroke "a" using {command down}
                        keystroke "c" using {command down}
                        delay 1
                        -- Close the Mail window
                        keystroke "w" using {command down}
                        delay 2
                        -- Force press "Don't Save" (Cmd + D)
                        key code 2 using {command down}
                        delay 1
                    end tell
                end tell
                return the clipboard
            end tell
            '''

            result = subprocess.run(["osascript", "-e", applescript], capture_output=True, text=True)
            email = result.stdout.strip() if result.stdout else "N/A"

            # Store data
            data.append([name, email])
            scraped_names.add(name)  # Mark this recruiter as scraped

            print(f"✅ Extracted Name: {name}, Email: {email}")

            time.sleep(2)  # Give time before moving to next recruiter

        except Exception as e:
            print(f"❌ Error extracting email for {name}: {e}")
            continue

    # Try to click "Load More" if available, otherwise stop
    try:
        load_more = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(),'Load More')]"))
        )
        load_more.click()
        time.sleep(3)  # Allow new recruiters to load
    except:
        print("✅ All recruiters processed.")
        break

# Save data to Excel
df = pd.DataFrame(data, columns=["Name", "Email"])
df.to_excel("recruiters.xlsx", index=False)

# Close browser
driver.quit()
print("🚀 Email extraction complete! Data saved to 'recruiters.xlsx'.")


✅ Extracted Name: Christie Kuhn, Email: rchrkuhn@amazon.jobs
✅ Extracted Name: Tara Hopkins, Email: thopkins@ups.com
✅ Extracted Name: Barbara Rivera, Email: barbara.rivera@jpmchase.com
✅ Extracted Name: Violet Rylo, Email: violet.rylo@mbww.com
✅ Extracted Name: Olga Bartolomeo, Email: Olga.Bartolomeo@citi.com
✅ Extracted Name: Lindsay Ahrens, Email: lindsay.ahrens@edwardjones.com
✅ Extracted Name: Sarah Bajohr, Email: Sarah.Bajohr@bnymellon.com
✅ Extracted Name: Danielle Mendez, Email: Danielle.mendez@ny.frb.org
✅ Extracted Name: Mike Hallahan, Email: mike.hallahan@materialplus.io
✅ Extracted Name: Bobbie Lachani, Email: bobbie.lachani@alticeusa.com
✅ Extracted Name: Megan Huang, Email: mhuang@actcommodities.com
✅ Extracted Name: Ali Perez, Email: aperez@actcommodities.com
✅ Extracted Name: Annie Henry, Email: ahenry@wallstreetprep.com
✅ Extracted Name: Lauren Khan, Email: Lkhan@merceradvisors.com
✅ Extracted Name: Bridget Fornaro, Email: Bridget.Fornaro@nbcuni.com
✅ Extracted Name: L