In [1]:
# Prøver Playwright

In [17]:
import asyncio
from playwright.async_api import async_playwright, Page, TimeoutError
import os
import urllib.parse
import re
import nest_asyncio
nest_asyncio.apply()

# Limit the number of concurrent downloads
semaphore = asyncio.Semaphore(5)

def extract_case_number(onclick_attr):
    if onclick_attr:
        match = re.search(r"case=([^\']+)", onclick_attr)
        if match:
            return match.group(1)
    return None

async def download_pdf(button, page_number, index):
    async with semaphore:
        try:
            # Check if button is visible and enabled
            if not await button.is_visible():
                print(f"[WARN] Button {index+1} on page {page_number} is not visible. Skipping...")
                return

            if not await button.is_enabled():
                print(f"[WARN] Button {index+1} on page {page_number} is not enabled. Skipping...")
                return

            # Extract the 'case' value from the button's OnClick attribute
            onclick_attr = await button.get_attribute('OnClick')
            print(f"[DEBUG] Button {index+1} on page {page_number} OnClick attribute: {onclick_attr}")
            case_value = extract_case_number(onclick_attr)
            filename = f"{case_value}.pdf" if case_value else f"page{page_number}_index{index+1}.pdf"

            os.makedirs('pdfs', exist_ok=True)
            filepath = os.path.join('pdfs', filename)

            if os.path.exists(filepath):
                print(f"[DEBUG] File already exists: {filepath}, skipping download.")
                return

            # Attempt to click the button and handle the resulting popup
            print(f"[INFO] Clicking button {index+1} on page {page_number}")
            async with button.page.expect_event("popup", timeout=60000) as popup_info:
                await button.click()
            new_page = await popup_info.value

            # Wait for the download to start from the new page
            async with new_page.expect_download(timeout=60000) as download_info:
                pass
            download = await download_info.value
            await download.save_as(filepath)
            print(f"[INFO] Downloaded: {filepath}")

            await new_page.close()
        except TimeoutError as e:
            print(f"[ERROR] Timeout on page {page_number}, index {index+1}: {e}")
        except Exception as e:
            print(f"[ERROR] Error on page {page_number}, index {index+1}: {e}")

async def process_page(page: Page, page_number: int):
    print(f"[INFO] Processing page {page_number}...")
    try:
        buttons = page.locator('td.col_vis_saken > button')
        button_count = await buttons.count()
        print(f"[DEBUG] Found {button_count} buttons on page {page_number}")

        tasks = []
        for i in range(button_count):
            button = buttons.nth(i)

            # Log detailed button state
            print(f"[DEBUG] Button {i+1} on page {page_number} state:")
            print(f"  - Visible: {await button.is_visible()}")
            print(f"  - Enabled: {await button.is_enabled()}")
            print(f"  - Text: {await button.text_content()}")

            tasks.append(asyncio.create_task(download_pdf(button, page_number, i)))

        await asyncio.gather(*tasks, return_exceptions=True)
    except Exception as e:
        print(f"[ERROR] Error while processing page {page_number}: {e}")

async def main():
    async with async_playwright() as p:
        try:
            browser = await p.chromium.launch(
                headless=False,
                args=['--disable-blink-features=AutomationControlled', '--no-sandbox']
            )
            context = await browser.new_context(
                accept_downloads=True,
                java_script_enabled=True,
            )
            page = await context.new_page()
            await page.goto("https://reiselivsforum.no/web/dommer_og_avgjoerelser/")
            print("[INFO] Navigated to the main page.")

            page_number = 1
            target_start_page = 123

            while page_number < target_start_page:
                print(f"[DEBUG] Navigating to page {page_number + 1}...")
                next_buttons = page.locator('button:has-text("Neste side")')
                next_button_count = await next_buttons.count()
                if next_button_count > 0:
                    await next_buttons.first.click()
                    await page.wait_for_load_state('networkidle')
                    page_number += 1
                    print(f"[DEBUG] Reached page {page_number}")
                else:
                    print(f"[WARN] No more pages. Stopped at {page_number}.")
                    await browser.close()
                    return

            while True:
                print(f"[DEBUG] Starting to process page {page_number}")
                await process_page(page, page_number)

                next_buttons = page.locator('button:has-text("Neste side")')
                next_button_count = await next_buttons.count()
                if next_button_count > 0:
                    await next_buttons.first.click()
                    await page.wait_for_load_state('networkidle')
                    page_number += 1
                    print(f"[DEBUG] Moving to next page: {page_number}")
                else:
                    print("[INFO] No more pages to process.")
                    break

        except Exception as e:
            print(f"[FATAL] Error in main loop: {e}")
        finally:
            if 'browser' in locals() and not browser.is_closed():
                print("[INFO] Closing browser.")
                await browser.close()

# Run the asynchronous main function
asyncio.run(main())


[INFO] Navigated to the main page.
[DEBUG] Navigating to page 2...
[DEBUG] Reached page 2
[DEBUG] Navigating to page 3...
[DEBUG] Reached page 3
[DEBUG] Navigating to page 4...
[DEBUG] Reached page 4
[DEBUG] Navigating to page 5...
[DEBUG] Reached page 5
[DEBUG] Navigating to page 6...
[DEBUG] Reached page 6
[DEBUG] Navigating to page 7...
[DEBUG] Reached page 7
[DEBUG] Navigating to page 8...
[DEBUG] Reached page 8
[DEBUG] Navigating to page 9...
[DEBUG] Reached page 9
[DEBUG] Navigating to page 10...
[DEBUG] Reached page 10
[DEBUG] Navigating to page 11...
[DEBUG] Reached page 11
[DEBUG] Navigating to page 12...
[DEBUG] Reached page 12
[DEBUG] Navigating to page 13...
[DEBUG] Reached page 13
[DEBUG] Navigating to page 14...
[DEBUG] Reached page 14
[DEBUG] Navigating to page 15...
[DEBUG] Reached page 15
[DEBUG] Navigating to page 16...
[DEBUG] Reached page 16
[DEBUG] Navigating to page 17...
[DEBUG] Reached page 17
[DEBUG] Navigating to page 18...
[DEBUG] Reached page 18
[DEBUG] Navi

AttributeError: 'Browser' object has no attribute 'is_closed'