In [3]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time, pandas as pd

# Stealthy Chrome
options = uc.ChromeOptions()
options.headless = True
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
# spoof a real user-agent
options.add_argument(
    "user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/112.0.0.0 Safari/537.36"
)

driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 10)

# Search query
query = "nurse"
search_location = "United States"
url = f"https://www.indeed.com/jobs?q={query}&l={search_location}"

driver.get(url)

# if Indeed shows a cookie consent banner, click "Accept"
try:
    accept = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
    )
    accept.click()
    print("Clicked accept cookies")
    time.sleep(1)
except:
    # no banner appeared
    pass

# DEBUG: dump the full page HTML
with open("debug.html", "w", encoding="utf-8") as f:
    f.write(driver.page_source)
print("▶️ Wrote debug.html – open this in your browser and search for job_seen_beacon")


job_data = []

while len(job_data) < 30:
    print(f"Scraping {len(job_data)} jobs so far...")

    # DEBUG: save a snapshot of the rendered page
    driver.save_screenshot("debug.png")
    print("▶️ Wrote debug.png – open this to see exactly what the headless browser saw")

    job_cards = wait.until(EC.presence_of_all_elements_located(
    (By.CSS_SELECTOR, 'div.job_seen_beacon')
    ))


    for card in job_cards:
        try:
            title   = card.find_element(By.CSS_SELECTOR, "h2.jobTitle span").text
            company = card.find_element(By.CSS_SELECTOR, "span.companyName").text
            job_loc     = card.find_element(By.CSS_SELECTOR, "div.companyLocation").text
            summary = card.find_element(By.CSS_SELECTOR, "div.job-snippet").text


            # Click job link or extract href
            job_link = card.find_element(By.TAG_NAME, "a").get_attribute("href")

            # Open the job page in the same browser
            driver.execute_script("window.open(arguments[0]);", job_link)
            driver.switch_to.window(driver.window_handles[1])
            time.sleep(2)

            try:
                # Sometimes class name may vary slightly; handle both
                description_elem = driver.find_element(By.ID, 'jobDescriptionText')
                description = description_elem.text
            except:
                description = ""

            # Close job tab and return to main results
            driver.close()
            driver.switch_to.window(driver.window_handles[0])

            job_data.append({
                "title": title,
                "company": company,
                "location": job_loc,
                "summary": summary,
                "link": job_link,
                "description": description
            })

            if len(job_data) >= 30:
                break   # stop once we have 30
        except Exception as e:
            print(f"Error: {e}")
            continue

    # Click next page
    if len(job_data) < 30:
        try:
            next_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//a[@aria-label="Next"]')))
            next_button.click()
        except:
            print("No more pages.")
            break

driver.quit()


▶️ Wrote debug.html – open this in your browser and search for job_seen_beacon
Scraping 0 jobs so far...
▶️ Wrote debug.png – open this to see exactly what the headless browser saw


TimeoutException: Message: 


In [2]:
print("Total jobs scraped:", len(job_data))

Total jobs scraped: 0


In [None]:
# Save as CSV
df = pd.DataFrame(job_data[:30])
df.to_csv("nurse_jobs.csv", index=False)
print("Saved 30 job listings to nurse_jobs.csv")