In [15]:
from playwright.sync_api import sync_playwright
import pandas as pd
import time

def scrape_nurse_jobs():
    with sync_playwright() as pw:
        # 1) launch a visible browser (so Cloudflare sees “real” Chrome)
        browser = pw.chromium.launch(headless=False)
        ctx     = browser.new_context(user_agent=(
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/112.0.0.0 Safari/537.36"
        ))
        page    = ctx.new_page()

        # 2) go to the search page and accept cookies
        url = "https://www.indeed.com/jobs?q=nurse&l=United+States"
        page.goto(url, timeout=30000)
        try:
            page.click("#onetrust-accept-btn-handler", timeout=5000)
        except:
            pass

        # 3) infinite-scroll a few times
        for _ in range(4):
            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            time.sleep(2)

        # 4) grab all the job cards
        cards = page.locator("div.job_seen_beacon")
        count = cards.count()
        print(f"👍 Loaded {count} cards on the page")

        # 5) extract previews + full description
        jobs = []
        for i in range(min(count, 30)):
            card = cards.nth(i)
            link = card.locator("a").get_attribute("href")
            title   = card.locator("h2.jobTitle span").inner_text()
            company = card.locator("span.companyName").inner_text()
            loc     = card.locator("div.companyLocation").inner_text()
            summary = card.locator("div.job-snippet").inner_text()

            # open detail in a new tab
            detail = ctx.new_page()
            detail.goto(link, timeout=30000)
            time.sleep(1)
            try:
                desc = detail.locator("#jobDescriptionText").inner_text()
            except:
                desc = ""
            detail.close()

            jobs.append({
                "title":       title,
                "company":     company,
                "location":    loc,
                "summary":     summary,
                "link":        link,
                "description": desc
            })

        browser.close()
        return jobs

if __name__ == "__main__":
    data = scrape_nurse_jobs()
    df   = pd.DataFrame(data)
    df.to_csv("nurse_jobs.csv", index=False)
    print("✅ Done — scraped", len(df), "jobs.")
    print(df.head())


Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.