From b0c9a1780431be72a7f98db1aa69cc15d8ea01b9 Mon Sep 17 00:00:00 2001 From: Thalison Fernandes Date: Fri, 3 Oct 2025 15:28:45 -0300 Subject: [PATCH 1/3] fix: map exception when try to take screenshot of an iframe --- .github/workflows/tests.yml | 4 + .gitignore | 5 +- CHANGELOG.md | 35 + LICENSE | 2 +- README.md | 39 + README_zh.md | 41 +- cz.yaml | 2 +- public/docs/features.md | 91 ++- public/docs/zh/features.md | 469 ++++++++++- public/index.html | 30 +- pydoll/browser/chromium/base.py | 24 +- pydoll/browser/tab.py | 13 +- pydoll/constants.py | 47 ++ pydoll/elements/mixins/find_elements_mixin.py | 12 +- pydoll/elements/web_element.py | 92 +++ pydoll/exceptions.py | 6 + pyproject.toml | 2 +- tests/pages/test_children.html | 28 + tests/test_browser/test_browser_base.py | 78 +- tests/test_browser/test_browser_chrome.py | 6 +- tests/test_browser/test_browser_edge.py | 6 +- tests/test_browser/test_browser_tab.py | 49 +- .../test_tab_request_integration.py | 18 +- tests/test_browser/test_tab_singleton.py | 180 ----- tests/test_find_elements_mixin.py | 10 + tests/test_web_element.py | 730 ++++++++++++------ 26 files changed, 1455 insertions(+), 564 deletions(-) create mode 100644 tests/pages/test_children.html delete mode 100644 tests/test_browser/test_tab_singleton.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 58c1b5bd..40c2ce03 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,6 +22,10 @@ jobs: run: | python -m pip install poetry poetry install + - name: Install Chrome + uses: browser-actions/setup-chrome@v1 + with: + chrome-version: 132 - name: Run tests with coverage run: | poetry run pytest -s -x --cov=pydoll -vv --cov-report=xml diff --git a/.gitignore b/.gitignore index 8e8be591..623d7c2d 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,7 @@ cython_debug/ #.idea/ .czrc -.ruff_cache/ \ No newline at end of file +.ruff_cache/ + +# Dev test file +dev_test_file.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1abdad64..c3f7c406 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,38 @@ +## 2.8.1 (2025-09-27) + +### Fix + +- store the opened tab in the _tabs_opened dictionary +- **elements**: correctly detect parenthesized XPath expressions + +### Refactor + +- simplify FindElementsMixin._get_expression_type startswith checks into single tuple + +## 2.8.0 (2025-08-28) + +### Feat + +- adding get_siblings_elements method +- adding get_children_elements method +- refactor Tab class to support optional WebSocket address handling +- add WebSocket connection support for existing browser instances +- add optional WebSocket address support in connection handler + +### Fix + +- add get siblings and get childen methods a raise_exc option +- improving children and parent retrive docstring and creating a private generic method for then +- using new execute_script public method +- solving conflicts +- rename pages fixtures files and adding a error test + +### Refactor + +- refactor Tab class to improve initialization and error handling +- refactor Browser class to manage opened tabs and WebSocket setup +- add new exception classes for connection and WebSocket errors + ## 2.7.0 (2025-08-22) ### Feat diff --git a/LICENSE b/LICENSE index f15d16df..2b5fb3d7 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright © 2025 +Copyright © 2025 AutoscrapeLabs Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 38c45afd..86518f82 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,45 @@ We believe that powerful automation shouldn't require you to become an expert in ## What's New +### Remote connections via WebSocket — control any Chrome from anywhere! + +You asked for it, we delivered. You can now connect to an already running browser remotely via its WebSocket address and use the full Pydoll API immediately. + +```python +from pydoll.browser.chromium import Chrome + +chrome = Chrome() +tab = await chrome.connect('ws://YOUR_HOST:9222/devtools/browser/XXXX') + +# Full power unlocked: navigation, element automation, requests, events… +await tab.go_to('https://example.com') +title = await tab.execute_script('return document.title') +print(title) +``` + +This makes it effortless to run Pydoll against remote/CI browsers, containers, or shared debugging targets — no local launch required. Just point to the WS endpoint and automate. + +### Navigate the DOM like a pro: get_children_elements() and get_siblings_elements() + +Two delightful helpers to traverse complex layouts with intention: + +```python +# Grab direct children of a container +container = await tab.find(id='cards') +cards = await container.get_children_elements(max_depth=1) + +# Want to go deeper? This will return children of children (and so on) +elements = await container.get_children_elements(max_depth=2) + +# Walk horizontal lists without re-querying the DOM +active = await tab.find(class_name='item-active') +siblings = await active.get_siblings_elements() + +print(len(cards), len(siblings)) +``` + +Use them to cut boilerplate, express intent, and keep your scraping/automation logic clean and readable — especially in dynamic grids, lists and menus. + ### WebElement: state waiting and new public APIs - New `wait_until(...)` on `WebElement` to await element states with minimal code: diff --git a/README_zh.md b/README_zh.md index 5931b683..92ff8ec4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -49,6 +49,45 @@ Pydoll 采用全新设计理念,从零构建,直接对接 Chrome DevTools Pr ## 最新功能 +### 通过 WebSocket 进行远程连接 —— 随时随地控制浏览器! + +现在你可以使用浏览器的 WebSocket 地址直接连接到已运行的实例,并立即使用完整的 Pydoll API: + +```python +from pydoll.browser.chromium import Chrome + +chrome = Chrome() +tab = await chrome.connect('ws://YOUR_HOST:9222/devtools/browser/XXXX') + +# 直接开干:导航、元素自动化、请求、事件… +await tab.go_to('https://example.com') +title = await tab.execute_script('return document.title') +print(title) +``` + +这让你可以轻松对接远程/CI 浏览器、容器或共享调试目标——无需本地启动,只需指向 WS 端点即可自动化。 + +### 像专业人士一样漫游 DOM:get_children_elements() 与 get_siblings_elements() + +两个让复杂布局遍历更优雅的小助手: + +```python +# 获取容器的直接子元素 +container = await tab.find(id='cards') +cards = await container.get_children_elements(max_depth=1) + +# 想更深入?这将返回子元素的子元素(以此类推) +elements = await container.get_children_elements(max_depth=2) + +# 在横向列表中无痛遍历兄弟元素 +active = await tab.find(class_name='item--active') +siblings = await active.get_siblings_elements() + +print(len(cards), len(siblings)) +``` + +用更少样板代码表达更多意图,特别适合动态网格、列表与菜单的场景,让抓取/自动化逻辑更清晰、更可读。 + ### WebElement:状态等待与新的公共 API - 新增 `wait_until(...)` 用于等待元素状态,使用更简单: @@ -212,7 +251,7 @@ options.browser_preferences = { 这种控制级别以前只有 Chrome 扩展开发者才能使用 - 现在它在你的自动化工具包中! -查看[文档](https://autoscrape-labs.github.io/pydoll/features/custom-browser-preferences/)了解更多详情。 +查看[文档](https://pydoll.tech/docs/zh/features/#custom-browser-preferences/)了解更多详情。 ### 新的 `get_parent_element()` 方法 检索任何 WebElement 的父元素,使导航 DOM 结构更加容易: diff --git a/cz.yaml b/cz.yaml index 780b4298..900a5e76 100644 --- a/cz.yaml +++ b/cz.yaml @@ -2,4 +2,4 @@ commitizen: name: cz_conventional_commits tag_format: $version - version: 2.7.0 + version: 2.8.1 diff --git a/public/docs/features.md b/public/docs/features.md index 048e35b0..0e6497b2 100644 --- a/public/docs/features.md +++ b/public/docs/features.md @@ -57,6 +57,53 @@ Capture visual content from web pages: - **High-Quality PDF Export**: Generate PDF documents from web pages - **Custom Formatting**: Coming soon! +## Remote Connections and Hybrid Automation + +### Connect to a running browser via WebSocket + +Control an already running browser remotely by pointing Pydoll to its DevTools WebSocket address. + +```python +import asyncio +from pydoll.browser.chromium import Chrome + +async def main(): + chrome = Chrome() + tab = await chrome.connect('ws://YOUR_HOST:9222/devtools/browser/XXXX') + + await tab.go_to('https://example.com') + title = await tab.execute_script('return document.title') + print(title) + +asyncio.run(main()) +``` + +Perfect for CI, containers, remote hosts, or shared debugging targets—no local launch required. Just provide the WS endpoint and automate. + +### Bring your own CDP: wrap existing sessions with Pydoll objects + +If you already have your own CDP integration, you can still leverage Pydoll’s high-level API by wiring it to an existing DevTools session. As long as you know an element’s `objectId`, you can create a `WebElement` directly: + +```python +from pydoll.connection import ConnectionHandler +from pydoll.elements.web_element import WebElement + +# Your DevTools WebSocket endpoint and an element objectId you resolved via CDP +ws = 'ws://YOUR_HOST:9222/devtools/page/ABCDEF...' +object_id = 'REMOTE_ELEMENT_OBJECT_ID' + +connection_handler = ConnectionHandler(ws_address=ws) +element = WebElement(object_id=object_id, connection_handler=connection_handler) + +# Use the full WebElement API immediately +visible = await element.is_visible() +await element.wait_until(is_interactable=True, timeout=10) +await element.click() +text = await element.text +``` + +This hybrid approach lets you blend your low-level CDP tooling (for discovery, instrumentation, or custom flows) with Pydoll’s ergonomic element API. + ## Intuitive Element Finding Pydoll v2.0+ introduces a revolutionary approach to finding elements that's both more intuitive and more powerful than traditional selector-based methods. @@ -137,6 +184,44 @@ async def query_examples(): asyncio.run(query_examples()) ``` +### DOM Traversal Helpers: get_children_elements() and get_siblings_elements() + +These helpers let you traverse the DOM tree from a known anchor, preserving scope and intent. + +- get_children_elements(max_depth: int = 1, tag_filter: list[str] | None = None, raise_exc: bool = False) -> list[WebElement] + - Returns descendants up to max_depth using pre-order traversal (direct children first, then their descendants) + - max_depth=1 returns only direct children; 2 includes grandchildren, and so on + - tag_filter restricts results to specific tags (use lowercase names, e.g. ['a', 'li']) + - raise_exc=True raises ElementNotFound if the underlying script fails to resolve + +- get_siblings_elements(tag_filter: list[str] | None = None, raise_exc: bool = False) -> list[WebElement] + - Returns elements sharing the same parent, excluding the current element + - tag_filter narrows by tag; order follows the parent’s child order + +```python +# Direct children in document order +container = await tab.find(id='cards') +children = await container.get_children_elements(max_depth=1) + +# Include grandchildren +descendants = await container.get_children_elements(max_depth=2) + +# Filter by tag +links = await container.get_children_elements(max_depth=4, tag_filter=['a']) + +# Horizontal traversal +active = await tab.find(class_name='item-active') +siblings = await active.get_siblings_elements() +link_siblings = await active.get_siblings_elements(tag_filter=['a']) +``` + +Performance and correctness notes: + +- DOM is a tree: breadth expands quickly with depth. Prefer small max_depth values and apply tag_filter to minimize work. +- Ordering: children follow document order; siblings follow the parent’s order for stable iteration. +- iFrames: each iframe has its own tree. Use `tab.get_frame(iframe_element)` to traverse inside the frame, then call these helpers there. +- Large documents: deep traversals can touch many nodes. Combine shallow traversal with targeted `find()`/`query()` on subtree anchors for best performance. + ## Native Cloudflare Captcha Bypass !!! warning "Important Information About Captcha Bypass" @@ -440,12 +525,8 @@ async def main(): async with Chrome() as browser: # Start the browser once await browser.start() - - # Create partial function with browser parameter - scrape_with_browser = partial(scrape_page, browser) - # Process all URLs concurrently using the same browser - results = await asyncio.gather(*(scrape_with_browser(url) for url in urls)) + results = await asyncio.gather(*(scrape_page(browser, url) for url in urls)) # Print results for result in results: diff --git a/public/docs/zh/features.md b/public/docs/zh/features.md index f7d67a5a..0d2d6f59 100644 --- a/public/docs/zh/features.md +++ b/public/docs/zh/features.md @@ -1,4 +1,4 @@ -__# 核心特性 +# 核心特性 Pydoll为浏览器自动化带来了突破性的功能,比传统浏览器自动化工具更加强大更易于使用。 @@ -35,7 +35,7 @@ Pydoll 基于 Python 的 asyncio 全新构建,提供以下功能: 实时响应浏览器事件: -- **Network Monitoring**: Track requests, responses, and failed loads +- **网络监控**:跟踪请求、响应和失败的加载 - **DOM 结构观测**: 响应页面结构的变化 - **页面生命周期事件**: 捕获导航、加载和渲染事件 - **自定义事件处理程序**: 为感兴趣的特定事件注册回调 @@ -59,6 +59,53 @@ Pydoll支持操作任何Chromium核心的浏览器: - **高质量 PDF 导出**:从网页生成 PDF 文档 - **自定义格式**:即将推出! +## 远程连接与混合自动化 + +### 通过 WebSocket 连接已运行的浏览器 + +只需提供 DevTools 的 WebSocket 地址,即可远程控制已经在运行的浏览器实例: + +```python +import asyncio +from pydoll.browser.chromium import Chrome + +async def main(): + chrome = Chrome() + tab = await chrome.connect('ws://YOUR_HOST:9222/devtools/browser/XXXX') + + await tab.go_to('https://example.com') + title = await tab.execute_script('return document.title') + print(title) + +asyncio.run(main()) +``` + +非常适合 CI、容器、远程主机或共享调试目标——无需本地启动,只需指向 WS 端点即可自动化。 + +### 自带 CDP:用 Pydoll 封装已有会话 + +如果你已经有自己的 CDP 集成,也可以将其与 Pydoll 的高级 API 结合使用。只要你知道元素的 `objectId`,就能直接构造 `WebElement`: + +```python +from pydoll.connection import ConnectionHandler +from pydoll.elements.web_element import WebElement + +# 你的 DevTools WebSocket 地址,以及通过 CDP 获取到的元素 objectId +ws = 'ws://YOUR_HOST:9222/devtools/page/ABCDEF...' +object_id = 'REMOTE_ELEMENT_OBJECT_ID' + +connection_handler = ConnectionHandler(ws_address=ws) +element = WebElement(object_id=object_id, connection_handler=connection_handler) + +# 立刻使用完整的 WebElement API +visible = await element.is_visible() +await element.wait_until(is_interactable=True, timeout=10) +await element.click() +text = await element.text +``` + +这种混合模式让你可以将底层的 CDP 能力(用于发现、注入或自定义流程)与 Pydoll 更易用的元素 API 顺畅结合。 + ## 直观的元素查找 Pydoll v2.0+ 引入了一种革命性的元素查找方法,比传统的基于选择器的方法更直观、更强大。 @@ -140,6 +187,44 @@ async def query_examples(): asyncio.run(query_examples()) ``` +### DOM 遍历助手:get_children_elements() 与 get_siblings_elements() + +从已知锚点按树形结构遍历 DOM,更加明确且安全: + +- get_children_elements(max_depth: int = 1, tag_filter: list[str] | None = None, raise_exc: bool = False) -> list[WebElement] + - 使用先序遍历返回后代元素(先直接子元素,再其后代),深度不超过 max_depth + - max_depth=1 仅返回直接子元素;2 包含孙辈元素,以此类推 + - tag_filter 用于按标签名过滤(小写,如 ['a', 'li']) + - 当 raise_exc=True 且脚本解析失败时会抛出 ElementNotFound + +- get_siblings_elements(tag_filter: list[str] | None = None, raise_exc: bool = False) -> list[WebElement] + - 返回与当前元素同一父节点下的兄弟元素(不包含当前元素) + - tag_filter 可按标签名过滤;返回顺序与父节点的子元素顺序一致 + +```python +# 文档顺序的直接子元素 +container = await tab.find(id='cards') +children = await container.get_children_elements(max_depth=1) + +# 包含孙辈 +descendants = await container.get_children_elements(max_depth=2) + +# 按标签过滤 +links = await container.get_children_elements(max_depth=4, tag_filter=['a']) + +# 横向遍历 +active = await tab.find(class_name='item-active') +siblings = await active.get_siblings_elements() +link_siblings = await active.get_siblings_elements(tag_filter=['a']) +``` + +性能与正确性提示: + +- DOM 是树结构:深度增加会迅速扩展宽度。优先使用较小的 max_depth,并结合 tag_filter 限制范围。 +- 顺序:子元素遵循文档顺序;兄弟元素遵循父节点的子元素顺序,便于稳定迭代。 +- iFrame:每个 iframe 是独立的 DOM 树。使用 `tab.get_frame(iframe_element)` 进入后,再在该 frame 内调用这些助手。 +- 大型文档:深层遍历可能访问大量节点。建议将浅层遍历与基于锚点的精确 `find()`/`query()` 结合,以获得更佳性能。 + ## 原生 Cloudflare 验证码绕过 !!! 警告“关于验证码绕过的重要信息” @@ -400,7 +485,6 @@ Pydoll 的异步架构允许您同时抓取多个页面或网站,以实现最 ```python import asyncio -from functools import partial from pydoll.browser.chromium import Chrome async def scrape_page(browser, url): @@ -444,11 +528,8 @@ async def main(): # Start the browser once await browser.start() - # Create partial function with browser parameter - scrape_with_browser = partial(scrape_page, browser) - # Process all URLs concurrently using the same browser - results = await asyncio.gather(*(scrape_with_browser(url) for url in urls)) + results = await asyncio.gather(*(scrape_page(browser, url) for url in urls)) # Print results for result in results: @@ -723,6 +804,379 @@ asyncio.run(comprehensive_network_monitoring()) - **调试**:识别失败的请求和网络问题 - **安全测试**:分析请求/响应模式 +## 浏览器上下文 HTTP 请求 + +Pydoll 通过 `tab.request` 属性提供了类似 `requests` 的强大接口,使 HTTP 请求在浏览器的 JavaScript 上下文中执行。这种混合模式将 Python `requests` 的熟悉体验与浏览器上下文执行的优势结合起来。 + +### 关键优势 + +- **继承浏览器会话状态**:自动包含 Cookie、认证和会话数据 +- **符合 CORS**:请求源自浏览器上下文,避免跨域限制 +- **非常适合 SPA**:适配大量使用 JavaScript 与动态认证的单页应用 +- **无需会话搬运**:不必在自动化与 API 客户端之间转移 Cookie 或 Token + +### 基本 HTTP 方法 + +所有标准 HTTP 方法都以熟悉的接口提供: + +```python +import asyncio +from pydoll.browser.chromium import Chrome + +async def browser_requests_example(): + async with Chrome() as browser: + tab = await browser.start() + + # 先建立会话上下文 + await tab.go_to('https://api.example.com') + + # GET 请求 + response = await tab.request.get('https://api.example.com/users') + print(f"Status: {response.status_code}") + print(f"Data: {response.json()}") + + # POST 请求(JSON) + user_data = {"name": "John Doe", "email": "john@example.com"} + response = await tab.request.post( + 'https://api.example.com/users', + json=user_data + ) + + # 带自定义头的 PUT 请求 + response = await tab.request.put( + 'https://api.example.com/users/123', + json=user_data, + headers={'X-Custom-Header': 'value'} + ) + + # DELETE 请求 + response = await tab.request.delete('https://api.example.com/users/123') + +asyncio.run(browser_requests_example()) +``` + +### 响应对象接口 + +响应对象与 Python `requests` 库接口一致: + +```python +async def response_handling_example(): + async with Chrome() as browser: + tab = await browser.start() + await tab.go_to('https://api.example.com') + + response = await tab.request.get('https://api.example.com/data') + + # 状态信息 + print(f"Status Code: {response.status_code}") + print(f"OK: {response.ok}") # 2xx/3xx 为 True + + # 响应内容 + print(f"Raw content: {response.content}") # bytes + print(f"Text content: {response.text}") # str + print(f"JSON data: {response.json()}") # dict/list + + # 响应头 + print(f"Response headers: {response.headers}") + print(f"Content-Type: {response.headers.get('content-type')}") + + # 实际发送的请求头 + print(f"Request headers: {response.request_headers}") + + # 响应设置的 Cookies + for cookie in response.cookies: + print(f"Cookie: {cookie.name}={cookie.value}") + + # 重定向后的最终 URL + print(f"Final URL: {response.url}") + + # 为 HTTP 错误抛出异常 + response.raise_for_status() # 4xx/5xx 抛出 HTTPError + +asyncio.run(response_handling_example()) +``` + +### 高级请求配置 + +使用完整的 HTTP 选项配置请求: + +```python +async def advanced_requests_example(): + async with Chrome() as browser: + tab = await browser.start() + await tab.go_to('https://api.example.com') + + # 复杂 POST,包含所有选项 + response = await tab.request.post( + 'https://api.example.com/submit', + json={ + "user": "test", + "action": "create" + }, + headers={ + 'Authorization': 'Bearer token-123', + 'X-API-Version': '2.0', + 'Content-Language': 'en-US' + }, + params={ + 'format': 'json', + 'version': '2' + } + ) + + # 表单提交 + form_response = await tab.request.post( + 'https://api.example.com/form', + data={ + 'username': 'testuser', + 'password': 'secret123' + }, + headers={'Content-Type': 'application/x-www-form-urlencoded'} + ) + + # 文件上传模拟 + file_response = await tab.request.post( + 'https://api.example.com/upload', + data={'file_content': 'base64-encoded-data'}, + headers={'Content-Type': 'multipart/form-data'} + ) + +asyncio.run(advanced_requests_example()) +``` + +### 混合自动化工作流 + +将 UI 自动化与直接 API 调用结合,以获得最大效率: + +```python +async def hybrid_automation_example(): + async with Chrome() as browser: + tab = await browser.start() + + # 第一步:基于 UI 的登录(处理复杂认证流程) + await tab.go_to('https://app.example.com/login') + + username_field = await tab.find(id='username') + password_field = await tab.find(id='password') + login_button = await tab.find(id='login-btn') + + await username_field.type_text('admin@example.com') + await password_field.type_text('secure_password') + await login_button.click() + + # 等待登录跳转 + await asyncio.sleep(3) + + # 第二步:利用继承的认证调用 API(无需手动提取 Token) + dashboard_response = await tab.request.get('https://app.example.com/api/dashboard') + dashboard_data = dashboard_response.json() + + # 批量操作(比 UI 更快) + for item_id in dashboard_data.get('item_ids', []): + update_response = await tab.request.put( + f'https://app.example.com/api/items/{item_id}', + json={'status': 'processed', 'updated_by': 'automation'} + ) + print(f"Updated item {item_id}: {update_response.status_code}") + + # 第三步:回到 UI 验证 + await tab.go_to('https://app.example.com/dashboard') + + updated_items = await tab.find(class_name='item-status', find_all=True) + for item in updated_items: + status = await item.text + print(f"UI shows item status: {status}") + +asyncio.run(hybrid_automation_example()) +``` + +这种浏览器上下文 HTTP 接口让 Pydoll 在现代 Web 自动化中更具优势,打破了传统的 UI 自动化与 API 交互之间的边界。 + +## 自定义浏览器首选项 + +Pydoll 通过 `ChromiumOptions.browser_preferences` 提供对 Chromium 内部首选项系统的直接访问。你可以根据 Chromium 源码中可用的设置配置浏览器,实现对浏览器行为的精细控制。 + +### 工作原理 + +Chromium 首选项使用点号分隔的键映射到嵌套的 Python 字典。每个 `.` 都表示一个新的字典层级。 + +源码参考:[chromium 的 pref_names.cc](https://chromium.googlesource.com/chromium/src/+/4aaa9f29d8fe5eac55b8632fa8fcb05a68d9005b/chrome/common/pref_names.cc) + +### 从源码构建首选项 + +```cpp +// 来自 Chromium 源码(pref_names.cc) +const char kDownloadDefaultDirectory[] = "download.default_directory"; +const char kPromptForDownload[] = "download.prompt_for_download"; +const char kSearchSuggestEnabled[] = "search.suggest_enabled"; +const char kSiteEngagementLastUpdateTime[] = "profile.last_engagement_time"; +const char kNewTabPageLocationOverride[] = "newtab_page_location_override"; +``` + +转换为 Python 字典: + +```python +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() +options.browser_preferences = { + 'download': { + 'default_directory': '/tmp/downloads', + 'prompt_for_download': False + }, + 'search': { + 'suggest_enabled': False + }, + 'profile': { + 'last_engagement_time': 1640995200 # timestamp + }, + 'newtab_page_location_override': 'https://www.google.com' +} +``` + +### 重要配置示例 + +#### 性能优化 + +```python +from pydoll.browser.chromium import Chrome +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() +options.browser_preferences = { + # 关闭网络预测和预取 + 'net': { + 'network_prediction_options': 2 # Never predict + }, + # 为速度关闭图片加载 + 'webkit': { + 'webprefs': { + 'loads_images_automatically': False, + 'plugins_enabled': False + } + }, + # 关闭错误页建议 + 'alternate_error_pages': { + 'enabled': False + } +} +``` + +#### 隐身自动化 + +```python +import time +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() +fake_timestamp = int(time.time()) - (90 * 24 * 60 * 60) # 90 天前 + +options.browser_preferences = { + # 模拟真实的浏览器使用历史 + 'profile': { + 'last_engagement_time': fake_timestamp, + 'exited_cleanly': True, + 'exit_type': 'Normal' + }, + # 覆盖新标签页 + 'newtab_page_location_override': 'https://www.google.com', + # 禁用遥测 + 'user_experience_metrics': { + 'reporting_enabled': False + } +} +``` + +#### 隐私与安全 + +```python +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() +options.browser_preferences = { + # 隐私设置 + 'enable_do_not_track': True, + 'enable_referrers': False, + 'safebrowsing': { + 'enabled': False + }, + # 关闭数据收集 + 'profile': { + 'password_manager_enabled': False + }, + 'autofill': { + 'enabled': False + }, + 'search': { + 'suggest_enabled': False + } +} +``` + +#### 下载与界面 + +```python +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() +options.browser_preferences = { + # 静默下载 + 'download': { + 'default_directory': '/tmp/automation-downloads', + 'prompt_for_download': False + }, + # 会话行为 + 'session': { + 'restore_on_startup': 5, # Open New Tab Page + 'startup_urls': ['about:blank'] + }, + # 首页 + 'homepage': 'https://www.google.com', + 'homepage_is_newtabpage': False +} +``` + +### 便捷方法 + +对于常见场景,你可以结合便捷方法与直接首选项: + +```python +from pydoll.browser.options import ChromiumOptions + +options = ChromiumOptions() + +# 下载管理 +options.set_default_download_directory('/tmp/downloads') +options.prompt_for_download = False +options.allow_automatic_downloads = True + +# 内容拦截与隐私 +options.block_notifications = True +options.block_popups = True +options.password_manager_enabled = False + +# 国际化 +options.set_accept_languages('pt-BR,en-US') +# PDF 与文件处理 +options.open_pdf_externally = True + +# 直接首选项(高级设置) +options.browser_preferences = { + 'net': {'network_prediction_options': 2}, + 'enable_do_not_track': True +} +``` + +### 影响与收益 + +- **性能**:通过禁用图片、预测和不必要的功能,可实现 3–5 倍更快的页面加载 +- **隐身**:构造更真实的浏览器指纹,绕过自动化检测 +- **隐私**:全面控制数据收集、跟踪与遥测 +- **自动化**:消除打断自动化流程的弹窗与提示 +- **企业**:配置数百项过去只有组策略才能控制的设置 + +这种对 Chromium 首选项系统的直接访问让你拥有与企业管理员、扩展开发者同级别的控制力,使复杂的浏览器定制在自动化脚本中成为可能。 + ## 上传文件支持 在您的自动化脚本中无缝上传文件: @@ -833,7 +1287,6 @@ asyncio.run(proxy_example()) ## 使用iFrames -Pydoll提供了 Pydoll 通过 `get_frame()` 方法提供无缝的 iframe 交互: ```python diff --git a/public/index.html b/public/index.html index f7533247..612186d5 100644 --- a/public/index.html +++ b/public/index.html @@ -4,14 +4,14 @@ - Pydoll - Automate the Web, naturally + Pydoll - scraping, the easier way - + @@ -22,7 +22,7 @@ - + @@ -73,7 +73,7 @@ "@type": "WebPage", "@id": "https://pydoll.tech/#webpage", "url": "https://pydoll.tech/", - "name": "Pydoll - Automate the Web, naturally", + "name": "Pydoll - scraping, the easier way", "isPartOf": { "@id": "https://pydoll.tech/#website" }, "about": { "@id": "https://pydoll.tech/#software" }, "description": "Pydoll is a Python CDP browser automation library for web scraping, with zero configuration, async performance, and intuitive API." @@ -245,7 +245,7 @@

- Pydoll: automate the web, naturally + Pydoll: scraping, the easier way

Built from scratch with zero configuration complexity, Pydoll connects directly to the Chrome DevTools Protocol. @@ -271,18 +271,14 @@

-
async def extract_github_data():
-   async with Chrome() as browser:
-      tab = await browser.start()
-      await tab.go_to('https://github.com/autoscrape-labs/pydoll')
-      # Extract data with readable code
-      stars = await (await tab.find(
-        id='repo-stars-counter-star')
-      ).text
-      description = await (await tab.query(
-        '//h2[contains(text(), "About")]/following-sibling::p')
-      ).text
-      print(f"⭐ Stars: {stars}", f"📝 Description: {description}")
+
✨ Simple, powerful, async diff --git a/pydoll/browser/chromium/base.py b/pydoll/browser/chromium/base.py index 800540c6..06c88a1b 100644 --- a/pydoll/browser/chromium/base.py +++ b/pydoll/browser/chromium/base.py @@ -28,10 +28,10 @@ from pydoll.exceptions import ( BrowserNotRunning, FailedToStartBrowser, - NoValidTabFound, + InvalidConnectionPort, InvalidWebSocketAddress, MissingTargetOrWebSocket, - InvalidConnectionPort, + NoValidTabFound, ) from pydoll.protocol.base import Command, Response, T_CommandParams, T_CommandResponse from pydoll.protocol.browser.methods import ( @@ -250,7 +250,9 @@ async def new_tab(self, url: str = '', browser_context_id: Optional[str] = None) ) target_id = response['result']['targetId'] tab = Tab(self, **self._get_tab_kwargs(target_id, browser_context_id)) - if url: await tab.go_to(url) + self._tabs_opened[target_id] = tab + if url: + await tab.go_to(url) return tab async def get_targets(self) -> list[TargetInfo]: @@ -283,7 +285,9 @@ async def get_opened_tabs(self) -> list[Tab]: ] all_target_ids = [target['targetId'] for target in valid_tab_targets] existing_target_ids = list(self._tabs_opened.keys()) - remaining_target_ids = [target_id for target_id in all_target_ids if target_id not in existing_target_ids] + remaining_target_ids = [ + target_id for target_id in all_target_ids if target_id not in existing_target_ids + ] existing_tabs = [self._tabs_opened[target_id] for target_id in existing_target_ids] new_tabs = [ Tab(self, **self._get_tab_kwargs(target_id)) @@ -714,7 +718,9 @@ def _validate_ws_address(ws_address: str): if not ws_address.startswith('ws://'): raise InvalidWebSocketAddress('WebSocket address must start with ws://') if len(ws_address.split('/')) < min_slashes: - raise InvalidWebSocketAddress(f'WebSocket address must contain at least {min_slashes} slashes') + raise InvalidWebSocketAddress( + f'WebSocket address must contain at least {min_slashes} slashes' + ) async def _setup_ws_address(self, ws_address: str): """Setup WebSocket address for browser.""" @@ -737,13 +743,13 @@ def _get_tab_kwargs(self, target_id: str, browser_context_id: Optional[str] = No Dict of kwargs for creating a tab. """ kwargs: dict[str, Any] = { - "target_id": target_id, - "browser_context_id": browser_context_id, + 'target_id': target_id, + 'browser_context_id': browser_context_id, } if self._ws_address: - kwargs["ws_address"] = self._get_tab_ws_address(target_id) + kwargs['ws_address'] = self._get_tab_ws_address(target_id) else: - kwargs["connection_port"] = self._connection_port + kwargs['connection_port'] = self._connection_port return kwargs def _get_tab_ws_address(self, tab_id: str) -> str: diff --git a/pydoll/browser/tab.py b/pydoll/browser/tab.py index aa18be2c..3c11116c 100644 --- a/pydoll/browser/tab.py +++ b/pydoll/browser/tab.py @@ -46,6 +46,7 @@ NoDialogPresent, NotAnIFrame, PageLoadTimeout, + TopLevelTargetRequired, WaitElementTimeout, ) from pydoll.protocol.base import EmptyResponse, Response @@ -252,7 +253,7 @@ async def enable_intercept_file_chooser_dialog(self): async def enable_auto_solve_cloudflare_captcha( self, custom_selector: Optional[tuple[By, str]] = None, - time_before_click: int = 2, + time_before_click: int = 5, time_to_wait_captcha: int = 5, ): """ @@ -527,7 +528,15 @@ async def take_screenshot( capture_beyond_viewport=beyond_viewport, ) ) - screenshot_data = response['result']['data'] + + try: + screenshot_data = response['result']['data'] + except KeyError: + raise TopLevelTargetRequired( + 'Command can only be executed on top-level targets. Please use ' + 'take_screenshot method on the WebElement object instead.' + ) + if as_base64: return screenshot_data diff --git a/pydoll/constants.py b/pydoll/constants.py index d8c876b4..f0e3e632 100644 --- a/pydoll/constants.py +++ b/pydoll/constants.py @@ -152,6 +152,53 @@ class Scripts: } """ + GET_CHILDREN_NODE = """ + function() {{ + function getChildrenUntilDepth(element, maxDepth, tagFilter = [], currentDepth = 1) + {{ + if (currentDepth > maxDepth) return []; + + const children = Array.from(element.children); + let filtered = tagFilter.length === 0 + ? children + : children.filter(child => tagFilter.includes(child.tagName.toLowerCase())); + + let allDescendants = [...filtered]; + + for (let child of children) + {{ + allDescendants.push( + ...getChildrenUntilDepth(child, maxDepth, tagFilter, currentDepth + 1) + ); + }} + + return allDescendants; + }} + + return getChildrenUntilDepth(this, {max_depth}, {tag_filter}); + }} + """ + + GET_SIBLINGS_NODE = """ + function() {{ + function getSiblingsUntilDepth(element, tagFilter = []) + {{ + const parent = element.parentElement; + const siblings = Array.from(parent.children); + let filtered = tagFilter.length === 0 + ? siblings.filter(child => child !== element) + : siblings.filter(child => + tagFilter.includes(child.tagName.toLowerCase()) && child !== element); + + let allDescendants = [...filtered]; + + return allDescendants; + }} + + return getSiblingsUntilDepth(this, {tag_filter}); + }} + """ + MAKE_REQUEST = """ (async function() {{ async function makeRequest(url, options) {{ diff --git a/pydoll/elements/mixins/find_elements_mixin.py b/pydoll/elements/mixins/find_elements_mixin.py index dd8ebe98..91cd6b38 100644 --- a/pydoll/elements/mixins/find_elements_mixin.py +++ b/pydoll/elements/mixins/find_elements_mixin.py @@ -59,7 +59,7 @@ async def find( timeout: int = ..., find_all: Literal[False] = False, raise_exc: Literal[True] = True, - **attributes: dict[str, str], + **attributes, ) -> 'WebElement': ... @overload @@ -73,7 +73,7 @@ async def find( timeout: int = ..., find_all: Literal[True] = True, raise_exc: Literal[True] = True, - **attributes: dict[str, str], + **attributes, ) -> list['WebElement']: ... @overload @@ -87,7 +87,7 @@ async def find( timeout: int = ..., find_all: Literal[True] = True, raise_exc: Literal[False] = False, - **attributes: dict[str, str], + **attributes, ) -> Optional[list['WebElement']]: ... @overload @@ -101,7 +101,7 @@ async def find( timeout: int = ..., find_all: Literal[False] = False, raise_exc: Literal[False] = False, - **attributes: dict[str, str], + **attributes, ) -> Optional['WebElement']: ... @overload @@ -115,7 +115,7 @@ async def find( timeout: int = ..., find_all: bool = ..., raise_exc: bool = ..., - **attributes: dict[str, str], + **attributes, ) -> Union['WebElement', list['WebElement'], None]: ... async def find( @@ -478,7 +478,7 @@ def _get_expression_type(expression: str) -> By: - XPath: starts with ./, or / - Default: CSS_SELECTOR """ - if expression.startswith('./') or expression.startswith('/'): + if expression.startswith(('./', '/', '(/')): return By.XPATH return By.CSS_SELECTOR diff --git a/pydoll/elements/web_element.py b/pydoll/elements/web_element.py index c1f74301..f0022c62 100644 --- a/pydoll/elements/web_element.py +++ b/pydoll/elements/web_element.py @@ -36,6 +36,7 @@ ) from pydoll.protocol.page.methods import CaptureScreenshotResponse from pydoll.protocol.page.types import ScreenshotFormat, Viewport +from pydoll.protocol.runtime.methods import GetPropertiesResponse from pydoll.utils import ( decode_base64_to_bytes, extract_text_from_html, @@ -143,6 +144,57 @@ async def get_parent_element(self) -> 'WebElement': attributes = await self._get_object_attributes(object_id=object_id) return WebElement(object_id, self._connection_handler, attributes_list=attributes) + async def get_children_elements( + self, max_depth: int = 1, tag_filter: list[str] = [], raise_exc: bool = False + ) -> list['WebElement']: + """ + Retrieve all direct and nested child elements of this element. + + Args: + max_depth (int, optional): Maximum depth to traverse when finding children. + Defaults to 1 for direct children only. + tag_filter (list[str], optional): List of HTML tag names to filter results. + If empty, returns all child elements regardless of tag. Defaults to []. + + Returns: + list[WebElement]: List of child WebElement objects found within the specified + depth and matching the tag filter criteria. + + Raises: + ElementNotFound: If no child elements are found for this element and raise_exc is True. + """ + children = await self._get_family_elements( + script=Scripts.GET_CHILDREN_NODE, max_depth=max_depth, tag_filter=tag_filter + ) + if not children and raise_exc: + raise ElementNotFound(f'Child element not found for element: {self}') + return children + + async def get_siblings_elements( + self, tag_filter: list[str] = [], raise_exc: bool = False + ) -> list['WebElement']: + """ + Retrieve all sibling elements of this element (elements at the same DOM level). + + Args: + tag_filter (list[str], optional): List of HTML tag names to filter results. + If empty, returns all sibling elements regardless of tag. Defaults to []. + + Returns: + list[WebElement]: List of sibling WebElement objects that share the same + parent as this element and match the tag filter criteria. + + Raises: + ElementNotFound: If no sibling elements are found for this element + and raise_exc is True. + """ + siblings = await self._get_family_elements( + script=Scripts.GET_SIBLINGS_NODE, tag_filter=tag_filter + ) + if not siblings and raise_exc: + raise ElementNotFound(f'Sibling element not found for element: {self}') + return siblings + async def take_screenshot(self, path: str, quality: int = 100): """ Capture screenshot of this element only. @@ -435,6 +487,46 @@ async def execute_script(self, script: str, return_by_value: bool = False): ) ) + async def _get_family_elements( + self, script: str, max_depth: int = 1, tag_filter: list[str] = [] + ) -> list['WebElement']: + """ + Retrieve all family elements of this element (elements at the same DOM level). + + Args: + script (str): CDP script to execute for retrieving family elements. + tag_filter (list[str], optional): List of HTML tag names to filter results. + If empty, returns all family elements regardless of tag. Defaults to []. + + Returns: + list[WebElement]: List of family WebElement objects that share the same + parent as this element and match the tag filter criteria. + """ + result = await self.execute_script( + script.format(tag_filter=tag_filter, max_depth=max_depth) + ) + if not self._has_object_id_key(result): + return [] + + array_object_id = result['result']['result']['objectId'] + + get_properties_command = RuntimeCommands.get_properties(object_id=array_object_id) + properties_response: GetPropertiesResponse = await self._execute_command( + get_properties_command + ) + + family_elements: list['WebElement'] = [] + for prop in properties_response['result']['result']: + if not (prop['name'].isdigit() and 'objectId' in prop['value']): + continue + child_object_id = prop['value']['objectId'] + attributes = await self._get_object_attributes(object_id=child_object_id) + family_elements.append( + WebElement(child_object_id, self._connection_handler, attributes_list=attributes) + ) + + return family_elements + def _def_attributes(self, attributes_list: list[str]): """Process flat attribute list into dictionary (renames 'class' to 'class_name').""" for i in range(0, len(attributes_list), 2): diff --git a/pydoll/exceptions.py b/pydoll/exceptions.py index 08e8e409..f4746230 100644 --- a/pydoll/exceptions.py +++ b/pydoll/exceptions.py @@ -103,6 +103,12 @@ class ProtocolException(PydollException): message = 'A protocol error occurred' +class TopLevelTargetRequired(ProtocolException): + """Raised when a command can only be executed on top-level targets.""" + + message = 'Command can only be executed on top-level targets.' + + class InvalidCommand(ProtocolException): """Raised when an invalid command is sent to the browser.""" diff --git a/pyproject.toml b/pyproject.toml index ce5e576e..45cfb65c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pydoll-python" -version = "2.7.0" +version = "2.8.1" description = "Pydoll is a library for automating chromium-based browsers without a WebDriver, offering realistic interactions." authors = ["Thalison Fernandes "] readme = "README.md" diff --git a/tests/pages/test_children.html b/tests/pages/test_children.html new file mode 100644 index 00000000..72c1a97a --- /dev/null +++ b/tests/pages/test_children.html @@ -0,0 +1,28 @@ + + + + + + Test Children Elements + + +
+
Child 1
+ Child 2 +

Child 3

+ Link 1 + Link 2 +
+
Nested Child 1
+ Nested Child 2 + Nested Link +
+
+ +
+ + + Another Link +
+ + diff --git a/tests/test_browser/test_browser_base.py b/tests/test_browser/test_browser_base.py index 5302f33d..8e9a48cc 100644 --- a/tests/test_browser/test_browser_base.py +++ b/tests/test_browser/test_browser_base.py @@ -25,6 +25,10 @@ ) from pydoll.protocol.fetch.events import FetchEvent from pydoll.connection.connection_handler import ConnectionHandler +from pydoll.exceptions import ( + MissingTargetOrWebSocket, + InvalidWebSocketAddress, +) from pydoll.protocol.network.types import RequestMethod, ErrorReason from pydoll.protocol.browser.types import DownloadBehavior, PermissionType @@ -227,8 +231,8 @@ async def test_new_tab_uses_ws_base_when_ws_address_present(mock_browser): assert isinstance(tab, Tab) assert tab._ws_address == 'ws://127.0.0.1:9222/devtools/page/new_page' - # When ws_address is used, target_id is resolved from ws when needed - assert tab._target_id is None + # When ws_address is used, target_id can be known from create_target response + assert tab._target_id == 'new_page' @pytest.mark.asyncio @@ -341,23 +345,23 @@ async def test_get_window_id_for_target(mock_browser): async def test_get_window_id_for_tab_raises_when_no_target_id_and_no_ws(mock_browser): # Tab created only with connection_port, without target_id and ws tab = Tab(mock_browser, connection_port=9222) - with pytest.raises(ValueError, match='Tab has no target ID or WebSocket address'): + with pytest.raises(MissingTargetOrWebSocket): await mock_browser.get_window_id_for_tab(tab) def test__validate_ws_address_raises_on_invalid_scheme(): - with pytest.raises(ValueError, match='must start with ws://'): + with pytest.raises(InvalidWebSocketAddress): Browser._validate_ws_address('http://localhost:9222/devtools/browser/abc') def test__validate_ws_address_raises_on_insufficient_slashes(): - with pytest.raises(ValueError, match='must contain at least 4 slashes'): + with pytest.raises(InvalidWebSocketAddress): Browser._validate_ws_address('ws://localhost') def test__get_tab_ws_address_raises_when_ws_not_set(mock_browser): mock_browser._ws_address = None - with pytest.raises(ValueError, match='WebSocket address is not set'): + with pytest.raises(InvalidWebSocketAddress): mock_browser._get_tab_ws_address('some-tab') @@ -899,7 +903,7 @@ async def test_get_opened_tabs_success(mock_browser): """Test get_opened_tabs with multiple valid tabs.""" # Mock get_targets to return various target types mock_targets = [ - {'targetId': 'tab1', 'type': 'page', 'url': 'https://example.com', 'title': 'Example Site'}, + {'targetId': 'tab3', 'type': 'page', 'url': 'https://example.com', 'title': 'Example Site'}, { 'targetId': 'ext1', 'type': 'page', @@ -913,14 +917,11 @@ async def test_get_opened_tabs_success(mock_browser): 'url': 'chrome://background', 'title': 'Background Page', }, - {'targetId': 'tab3', 'type': 'page', 'url': 'chrome://newtab/', 'title': 'New Tab'}, + {'targetId': 'tab1', 'type': 'page', 'url': 'chrome://newtab/', 'title': 'New Tab'}, ] mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry to avoid conflicts - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return 3 tabs (excluding extension and background_page) @@ -931,7 +932,7 @@ async def test_get_opened_tabs_success(mock_browser): assert isinstance(tab, Tab) # Verify target IDs are correct (should be in reversed order) - expected_target_ids = ['tab3', 'tab2', 'tab1'] # reversed order + expected_target_ids = ['tab1', 'tab2', 'tab3'] # reversed order actual_target_ids = [tab._target_id for tab in tabs] assert actual_target_ids == expected_target_ids @@ -966,9 +967,6 @@ async def test_get_opened_tabs_no_valid_tabs(mock_browser): mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return empty list @@ -983,9 +981,6 @@ async def test_get_opened_tabs_empty_targets(mock_browser): """Test get_opened_tabs when no targets exist.""" mock_browser.get_targets = AsyncMock(return_value=[]) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() assert len(tabs) == 0 @@ -1016,9 +1011,6 @@ async def test_get_opened_tabs_filters_extensions(mock_browser): mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return only 2 tabs (excluding extensions) @@ -1057,9 +1049,6 @@ async def test_get_opened_tabs_filters_non_page_types(mock_browser): mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return only 2 tabs (only 'page' type) @@ -1075,34 +1064,6 @@ async def test_get_opened_tabs_filters_non_page_types(mock_browser): assert actual_target_ids == expected_target_ids -@pytest.mark.asyncio -async def test_get_opened_tabs_singleton_behavior(mock_browser): - """Test that get_opened_tabs respects Tab singleton pattern.""" - mock_targets = [ - {'targetId': 'tab1', 'type': 'page', 'url': 'https://example.com', 'title': 'Example Site'}, - {'targetId': 'tab2', 'type': 'page', 'url': 'https://google.com', 'title': 'Google'}, - ] - - mock_browser.get_targets = AsyncMock(return_value=mock_targets) - - # Clear Tab singleton registry - Tab._instances.clear() - - # First call - tabs1 = await mock_browser.get_opened_tabs() - - # Second call with same targets - tabs2 = await mock_browser.get_opened_tabs() - - # Should return same instances due to singleton pattern - assert len(tabs1) == len(tabs2) == 2 - - # Verify singleton behavior - same target_id should return same instance - for tab1, tab2 in zip(tabs1, tabs2): - if tab1._target_id == tab2._target_id: - assert tab1 is tab2 # Same object reference - - @pytest.mark.asyncio async def test_get_opened_tabs_order_is_reversed(mock_browser): """Test that get_opened_tabs returns tabs in reversed order (most recent first).""" @@ -1129,9 +1090,6 @@ async def test_get_opened_tabs_order_is_reversed(mock_browser): mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return in reversed order (newest first) @@ -1180,9 +1138,6 @@ async def test_get_opened_tabs_with_mixed_valid_invalid_targets(mock_browser): mock_browser.get_targets = AsyncMock(return_value=mock_targets) - # Clear Tab singleton registry - Tab._instances.clear() - tabs = await mock_browser.get_opened_tabs() # Should return only 3 valid tabs @@ -1204,9 +1159,6 @@ async def test_get_opened_tabs_integration_with_new_tab(mock_browser): # Mock initial targets (empty) mock_browser.get_targets = AsyncMock(return_value=[]) - # Clear Tab singleton registry - Tab._instances.clear() - # Initially no tabs tabs = await mock_browser.get_opened_tabs() assert len(tabs) == 0 @@ -1237,8 +1189,8 @@ async def test_get_opened_tabs_integration_with_new_tab(mock_browser): assert len(tabs) == 1 assert tabs[0]._target_id == 'new_tab_1' - # Due to singleton pattern, should be the same instance - assert tabs[0] is new_tab + # Without singleton, instance identity can differ but ids should match + assert tabs[0]._target_id == new_tab._target_id @pytest.mark.asyncio diff --git a/tests/test_browser/test_browser_chrome.py b/tests/test_browser/test_browser_chrome.py index 358182fb..ea886486 100644 --- a/tests/test_browser/test_browser_chrome.py +++ b/tests/test_browser/test_browser_chrome.py @@ -6,7 +6,7 @@ from pydoll.browser.chromium.chrome import Chrome from pydoll.browser.options import ChromiumOptions -from pydoll.exceptions import InvalidBrowserPath, UnsupportedOS +from pydoll.exceptions import InvalidBrowserPath, UnsupportedOS, InvalidConnectionPort class TestChromeInitialization: @@ -350,7 +350,7 @@ def test_chrome_with_zero_port(self): assert chrome._connection_port in range(9223, 9323) def test_chrome_with_negative_port(self): - """Test Chrome with negative port (should raise ValueError).""" + """Test Chrome with negative port (should raise InvalidConnectionPort).""" with patch.multiple( Chrome, _get_default_binary_location=MagicMock(return_value='/fake/chrome'), @@ -367,7 +367,7 @@ def test_chrome_with_negative_port(self): 'pydoll.browser.managers.proxy_manager.ProxyManager', autospec=True, ): - with pytest.raises(ValueError, match='Connection port must be a positive integer'): + with pytest.raises(InvalidConnectionPort): Chrome(connection_port=-1) diff --git a/tests/test_browser/test_browser_edge.py b/tests/test_browser/test_browser_edge.py index 9d66d9d9..1447d06d 100644 --- a/tests/test_browser/test_browser_edge.py +++ b/tests/test_browser/test_browser_edge.py @@ -6,7 +6,7 @@ from pydoll.browser.chromium.edge import Edge from pydoll.browser.managers import ChromiumOptionsManager from pydoll.browser.options import ChromiumOptions -from pydoll.exceptions import UnsupportedOS, InvalidBrowserPath +from pydoll.exceptions import UnsupportedOS, InvalidBrowserPath, InvalidConnectionPort class TestEdgeInitialization: @@ -403,7 +403,7 @@ def test_edge_with_zero_port(self): assert edge._connection_port in range(9223, 9323) def test_edge_with_negative_port(self): - """Test Edge with negative port (should raise ValueError).""" + """Test Edge with negative port (should raise InvalidConnectionPort).""" with patch.multiple( Edge, _get_default_binary_location=MagicMock(return_value='/fake/edge'), @@ -420,7 +420,7 @@ def test_edge_with_negative_port(self): 'pydoll.browser.managers.proxy_manager.ProxyManager', autospec=True, ): - with pytest.raises(ValueError, match='Connection port must be a positive integer'): + with pytest.raises(InvalidConnectionPort): Edge(connection_port=-1) def test_edge_with_edge_specific_arguments(self): diff --git a/tests/test_browser/test_browser_tab.py b/tests/test_browser/test_browser_tab.py index 2099e6e6..515e499a 100644 --- a/tests/test_browser/test_browser_tab.py +++ b/tests/test_browser/test_browser_tab.py @@ -12,7 +12,7 @@ from pydoll.browser.tab import Tab from pydoll.protocol.browser.events import BrowserEvent from pydoll.protocol.browser.types import DownloadBehavior -from pydoll.exceptions import DownloadTimeout +from pydoll.exceptions import DownloadTimeout, InvalidTabInitialization from pydoll.exceptions import ( NoDialogPresent, PageLoadTimeout, @@ -50,20 +50,15 @@ async def mock_browser(): @pytest_asyncio.fixture async def tab(mock_browser, mock_connection_handler): """Tab fixture with mocked dependencies.""" - # Clear singleton registry before each test - Tab._instances.clear() - - # Generate unique target_id for each test to avoid singleton conflicts unique_target_id = f'test-target-{uuid.uuid4().hex[:8]}' - with patch('pydoll.browser.tab.ConnectionHandler', return_value=mock_connection_handler): - tab = Tab( + created = Tab( browser=mock_browser, connection_port=9222, target_id=unique_target_id, browser_context_id='test-context-id' ) - return tab + return created def assert_mock_called_at_least_once(mock_obj, method_name='execute_command'): @@ -78,9 +73,8 @@ def assert_mock_called_at_least_once(mock_obj, method_name='execute_command'): @pytest.fixture(autouse=True) def cleanup_tab_registry(): - """Automatically clean up Tab singleton registry after each test.""" + """No-op: singleton removed; keep fixture for compatibility.""" yield - Tab._instances.clear() class TestTabInitialization: @@ -90,7 +84,7 @@ def test_tab_initialization(self, tab, mock_browser): """Test basic Tab initialization.""" assert tab._browser == mock_browser assert tab._connection_port == 9222 - assert tab._target_id.startswith('test-target-') # Now using unique IDs + assert tab._target_id.startswith('test-target-') assert tab._browser_context_id == 'test-context-id' assert not tab.page_events_enabled assert not tab.network_events_enabled @@ -99,16 +93,8 @@ def test_tab_initialization(self, tab, mock_browser): assert not tab.runtime_events_enabled assert not tab.intercept_file_chooser_dialog_enabled - def test_tab_singleton_with_ws_address_key(self, mock_browser, mock_connection_handler): - """Existing instance should be reused when created with same ws_address key.""" - with patch('pydoll.browser.tab.ConnectionHandler', return_value=mock_connection_handler): - ws = 'ws://localhost:9222/devtools/page/AAA' - t1 = Tab(browser=mock_browser, ws_address=ws) - t2 = Tab(browser=mock_browser, ws_address=ws) - assert t1 is t2 - def test_tab_init_raises_when_no_identifiers(self, mock_browser): - with pytest.raises(ValueError, match='Either connection_port, target_id, or ws_address must be provided'): + with pytest.raises(InvalidTabInitialization): Tab(browser=mock_browser) def test_tab_properties(self, tab): @@ -1294,6 +1280,29 @@ async def test_get_frame_success(self, tab, mock_browser): assert isinstance(frame, Tab) mock_browser.get_targets.assert_called_once() + @pytest.mark.asyncio + async def test_get_frame_uses_cache_on_subsequent_calls(self, tab, mock_browser): + """Subsequent calls to get_frame should return cached Tab instance.""" + # Prepare iframe element + mock_iframe_element = MagicMock() + mock_iframe_element.tag_name = 'iframe' + frame_url = 'https://example.com/iframe' + mock_iframe_element.get_attribute.return_value = frame_url + # Prepare browser targets and cache + mock_browser.get_targets = AsyncMock(return_value=[ + {'targetId': 'iframe-target-id', 'url': frame_url, 'type': 'page'} + ]) + tab._browser._tabs_opened = {} + + with patch('pydoll.browser.tab.ConnectionHandler', autospec=True): + frame1 = await tab.get_frame(mock_iframe_element) + # Second call should reuse from cache and not create a new Tab + frame2 = await tab.get_frame(mock_iframe_element) + + assert isinstance(frame1, Tab) + assert frame1 is frame2 + assert tab._browser._tabs_opened['iframe-target-id'] is frame1 + @pytest.mark.asyncio async def test_get_frame_not_iframe(self, tab): """Test getting frame from non-iframe element.""" diff --git a/tests/test_browser/test_tab_request_integration.py b/tests/test_browser/test_tab_request_integration.py index 092745a0..e497c5fc 100644 --- a/tests/test_browser/test_tab_request_integration.py +++ b/tests/test_browser/test_tab_request_integration.py @@ -42,9 +42,6 @@ async def mock_browser(): @pytest_asyncio.fixture async def tab(mock_browser, mock_connection_handler): """Tab fixture with mocked dependencies.""" - # Clear singleton registry before each test - Tab._instances.clear() - # Generate unique target_id for each test to avoid singleton conflicts unique_target_id = f'test-target-{uuid.uuid4().hex[:8]}' @@ -69,9 +66,8 @@ async def tab(mock_browser, mock_connection_handler): @pytest_asyncio.fixture def cleanup_tab_registry(): - """Clean up Tab singleton registry after each test.""" + """No-op: singleton removed; keep fixture for compatibility.""" yield - Tab._instances.clear() class TestTabRequestProperty: @@ -405,20 +401,18 @@ def test_request_property_after_tab_reuse(self, mock_browser, mock_connection_ha ) request1 = tab1.request - # Second tab instance with same target_id (simulating singleton behavior) + # Second tab instance with same target_id (no singleton anymore) tab2 = Tab( browser=mock_browser, connection_port=9222, target_id=target_id, browser_context_id='test-context-reuse' ) - - # Due to singleton behavior, tab2 should be the same as tab1 - assert tab2 is tab1 - - # Request should also be the same + # With no singleton, they are different instances, but independent request is allowed + assert tab2 is not tab1 + # Request instances are created per tab; they are distinct here request2 = tab2.request - assert request2 is request1 + assert request2 is not request1 @pytest.mark.asyncio async def test_request_property_memory_efficiency(self, tab): diff --git a/tests/test_browser/test_tab_singleton.py b/tests/test_browser/test_tab_singleton.py deleted file mode 100644 index 1848efa1..00000000 --- a/tests/test_browser/test_tab_singleton.py +++ /dev/null @@ -1,180 +0,0 @@ -""" -Tests for Tab singleton pattern based on target_id. -""" - -import pytest -from unittest.mock import Mock, AsyncMock - -from pydoll.browser.tab import Tab - - -class TestTabSingleton: - """Tests for Tab singleton behavior.""" - - def setup_method(self): - """Clear instance registry before each test.""" - Tab._instances.clear() - - def teardown_method(self): - """Clear instance registry after each test.""" - Tab._instances.clear() - - def test_same_target_id_returns_same_instance(self): - """Test that same target_id returns the same instance.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id = "target-123" - browser_context_id = "context-456" - - # Act - tab1 = Tab(browser, connection_port, target_id, browser_context_id) - tab2 = Tab(browser, connection_port, target_id, browser_context_id) - - # Assert - assert tab1 is tab2 - assert tab1._target_id == target_id - assert tab2._target_id == target_id - - def test_different_target_ids_return_different_instances(self): - """Test that different target_ids return different instances.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id1 = "target-123" - target_id2 = "target-456" - - # Act - tab1 = Tab(browser, connection_port, target_id1) - tab2 = Tab(browser, connection_port, target_id2) - - # Assert - assert tab1 is not tab2 - assert tab1._target_id == target_id1 - assert tab2._target_id == target_id2 - - def test_get_instance_returns_existing_instance(self): - """Test that get_instance returns existing instance.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id = "target-123" - - # Act - tab = Tab(browser, connection_port, target_id) - retrieved_tab = Tab.get_instance(target_id) - - # Assert - assert retrieved_tab is tab - - def test_get_instance_returns_none_for_nonexistent_target(self): - """Test that get_instance returns None for non-existent target_id.""" - # Act - retrieved_tab = Tab.get_instance("nonexistent-target") - - # Assert - assert retrieved_tab is None - - def test_get_all_instances_returns_all_active_instances(self): - """Test that get_all_instances returns all active instances.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id1 = "target-123" - target_id2 = "target-456" - - # Act - tab1 = Tab(browser, connection_port, target_id1) - tab2 = Tab(browser, connection_port, target_id2) - all_instances = Tab.get_all_instances() - - # Assert - assert len(all_instances) == 2 - assert all_instances[target_id1] is tab1 - assert all_instances[target_id2] is tab2 - - def test_remove_instance_removes_from_registry(self): - """Test that _remove_instance removes instance from registry.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id = "target-123" - - # Act - tab = Tab(browser, connection_port, target_id) - assert Tab.get_instance(target_id) is tab - - Tab._remove_instance(target_id) - retrieved_tab = Tab.get_instance(target_id) - - # Assert - assert retrieved_tab is None - assert len(Tab.get_all_instances()) == 0 - - @pytest.mark.asyncio - async def test_close_removes_instance_from_registry(self): - """Test that close() removes instance from registry.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id = "target-123" - - tab = Tab(browser, connection_port, target_id) - tab._execute_command = AsyncMock(return_value={'result': 'success'}) - - # Verify instance is in registry - assert Tab.get_instance(target_id) is tab - - # Act - await tab.close() - - # Assert - assert Tab.get_instance(target_id) is None - assert len(Tab.get_all_instances()) == 0 - - def test_existing_instance_properties_are_updated(self): - """Test that existing instance properties are updated.""" - # Arrange - browser1 = Mock() - browser2 = Mock() - connection_port1 = 9222 - connection_port2 = 9223 - target_id = "target-123" - context1 = "context-1" - context2 = "context-2" - - # Act - tab1 = Tab(browser1, connection_port1, target_id, context1) - tab2 = Tab(browser2, connection_port2, target_id, context2) - - # Assert - assert tab1 is tab2 - assert tab1._browser is browser2 # Updated - assert tab1._connection_port == connection_port2 # Updated - assert tab1._browser_context_id == context2 # Updated - - def test_initialization_is_skipped_for_existing_instance(self): - """Test that __init__ is skipped for existing instances.""" - # Arrange - browser = Mock() - connection_port = 9222 - target_id = "target-123" - - # Act - tab1 = Tab(browser, connection_port, target_id) - original_initialized = tab1._initialized - - # Modify a property to verify __init__ is not executed again - tab1._page_events_enabled = True - - tab2 = Tab(browser, connection_port, target_id) - - # Assert - assert tab1 is tab2 - assert tab1._initialized == original_initialized - assert tab1._page_events_enabled is True # Not reset - - def test_remove_nonexistent_instance_does_not_raise_error(self): - """Test that removing non-existent instance does not raise error.""" - # Act & Assert - should not raise exception - Tab._remove_instance("nonexistent-target") \ No newline at end of file diff --git a/tests/test_find_elements_mixin.py b/tests/test_find_elements_mixin.py index c6cd7e0a..18b972b7 100644 --- a/tests/test_find_elements_mixin.py +++ b/tests/test_find_elements_mixin.py @@ -201,6 +201,16 @@ def test_edge_case_expressions(self): # Empty string should default to CSS assert FindElementsMixin._get_expression_type('') == By.CSS_SELECTOR + def test_xpath_with_parentheses_and_predicate(self): + """Test XPath detection with parentheses, e.g. (//div)[last()].""" + expressions = [ + '(//div)[last()]', + '(//span[@class="btn"])[1]', + '(/html/body/div)[position()=1]' + ] + for expr in expressions: + assert FindElementsMixin._get_expression_type(expr) == By.XPATH + class TestEnsureRelativeXPath: """Test the _ensure_relative_xpath static method.""" diff --git a/tests/test_web_element.py b/tests/test_web_element.py index cc9cf4bd..a8a9a27c 100644 --- a/tests/test_web_element.py +++ b/tests/test_web_element.py @@ -1,29 +1,26 @@ +import asyncio +import json +from pathlib import Path +from unittest.mock import AsyncMock, patch + import pytest import pytest_asyncio -from unittest.mock import AsyncMock, MagicMock, patch, mock_open -import json -import asyncio +from pydoll.browser.options import ChromiumOptions as Options +from pydoll.browser.chromium.chrome import Chrome +from pydoll.commands import DomCommands, RuntimeCommands +from pydoll.constants import Key +from pydoll.elements.web_element import WebElement from pydoll.exceptions import ( - ElementNotVisible, - ElementNotInteractable, - ElementNotFound, ElementNotAFileInput, + ElementNotFound, + ElementNotInteractable, + ElementNotVisible, WaitElementTimeout, ) -from pydoll.commands import ( - DomCommands, - InputCommands, - PageCommands, - RuntimeCommands, -) -from pydoll.constants import Key from pydoll.protocol.input.types import KeyModifier -from pydoll.elements.web_element import WebElement - - @pytest_asyncio.fixture async def mock_connection_handler(): """Mock connection handler for WebElement tests.""" @@ -37,11 +34,16 @@ async def mock_connection_handler(): def web_element(mock_connection_handler): """Basic WebElement fixture with common attributes.""" attributes_list = [ - 'id', 'test-id', - 'class', 'test-class', - 'value', 'test-value', - 'tag_name', 'div', - 'type', 'text' + 'id', + 'test-id', + 'class', + 'test-class', + 'value', + 'test-value', + 'tag_name', + 'div', + 'type', + 'text', ] return WebElement( object_id='test-object-id', @@ -56,10 +58,14 @@ def web_element(mock_connection_handler): def input_element(mock_connection_handler): """Input element fixture for form-related tests.""" attributes_list = [ - 'id', 'input-id', - 'tag_name', 'input', - 'type', 'text', - 'value', 'initial-value' + 'id', + 'input-id', + 'tag_name', + 'input', + 'type', + 'text', + 'value', + 'initial-value', ] return WebElement( object_id='input-object-id', @@ -73,11 +79,7 @@ def input_element(mock_connection_handler): @pytest.fixture def file_input_element(mock_connection_handler): """File input element fixture for file upload tests.""" - attributes_list = [ - 'id', 'file-input-id', - 'tag_name', 'input', - 'type', 'file' - ] + attributes_list = ['id', 'file-input-id', 'tag_name', 'input', 'type', 'file'] return WebElement( object_id='file-input-object-id', connection_handler=mock_connection_handler, @@ -90,11 +92,7 @@ def file_input_element(mock_connection_handler): @pytest.fixture def option_element(mock_connection_handler): """Option element fixture for dropdown tests.""" - attributes_list = [ - 'tag_name', 'option', - 'value', 'option-value', - 'id', 'option-id' - ] + attributes_list = ['tag_name', 'option', 'value', 'option-value', 'id', 'option-id'] return WebElement( object_id='option-object-id', connection_handler=mock_connection_handler, @@ -107,11 +105,7 @@ def option_element(mock_connection_handler): @pytest.fixture def disabled_element(mock_connection_handler): """Disabled element fixture for testing enabled/disabled state.""" - attributes_list = [ - 'id', 'disabled-id', - 'tag_name', 'button', - 'disabled', 'true' - ] + attributes_list = ['id', 'disabled-id', 'tag_name', 'button', 'disabled', 'true'] return WebElement( object_id='disabled-object-id', connection_handler=mock_connection_handler, @@ -121,6 +115,30 @@ def disabled_element(mock_connection_handler): ) +@pytest.fixture +def ci_chrome_options(): + """Chrome options optimized for CI environments.""" + options = Options() + options.headless = True + options.start_timeout = 30 + + # CI-specific arguments + options.add_argument('--no-sandbox') + options.add_argument('--disable-dev-shm-usage') + options.add_argument('--disable-gpu') + options.add_argument('--disable-extensions') + options.add_argument('--disable-background-timer-throttling') + options.add_argument('--disable-backgrounding-occluded-windows') + options.add_argument('--disable-renderer-backgrounding') + options.add_argument('--disable-default-apps') + + # Memory optimization + options.add_argument('--memory-pressure-off') + options.add_argument('--max_old_space_size=4096') + + return options + + class TestWebElementInitialization: """Test WebElement initialization and basic properties.""" @@ -134,15 +152,13 @@ def test_web_element_initialization(self, web_element): 'class_name': 'test-class', 'value': 'test-value', 'tag_name': 'div', - 'type': 'text' + 'type': 'text', } def test_web_element_initialization_empty_attributes(self, mock_connection_handler): """Test WebElement initialization with empty attributes list.""" element = WebElement( - object_id='empty-id', - connection_handler=mock_connection_handler, - attributes_list=[] + object_id='empty-id', connection_handler=mock_connection_handler, attributes_list=[] ) assert element._attributes == {} assert element._search_method is None @@ -151,13 +167,13 @@ def test_web_element_initialization_empty_attributes(self, mock_connection_handl def test_web_element_initialization_odd_attributes(self, mock_connection_handler): """Test WebElement initialization with odd number of attributes (causes IndexError).""" attributes_list = ['id', 'test-id', 'class'] # Missing value for 'class' - + # This should raise IndexError because _def_attributes doesn't handle odd lists with pytest.raises(IndexError): WebElement( object_id='odd-id', connection_handler=mock_connection_handler, - attributes_list=attributes_list + attributes_list=attributes_list, ) def test_class_attribute_renamed_to_class_name(self, mock_connection_handler): @@ -166,7 +182,7 @@ def test_class_attribute_renamed_to_class_name(self, mock_connection_handler): element = WebElement( object_id='class-test', connection_handler=mock_connection_handler, - attributes_list=attributes_list + attributes_list=attributes_list, ) assert 'class' not in element._attributes assert element._attributes['class_name'] == 'my-class' @@ -193,7 +209,7 @@ def test_properties_with_none_values(self, mock_connection_handler): element = WebElement( object_id='empty-element', connection_handler=mock_connection_handler, - attributes_list=[] + attributes_list=[], ) assert element.value is None assert element.class_name is None @@ -277,7 +293,7 @@ async def test_insert_text(self, input_element): """Test insert_text method.""" test_text = 'Hello World' await input_element.insert_text(test_text) - + input_element._connection_handler.execute_command.assert_called_once() @pytest.mark.asyncio @@ -291,7 +307,7 @@ async def test_type_text(self, input_element): # Should call execute_command for each character assert input_element._connection_handler.execute_command.call_count == len(test_text) assert input_element.click.call_count == 1 - + # Verify sleep was called between characters assert mock_sleep.call_count == len(test_text) mock_sleep.assert_called_with(0.05) @@ -310,18 +326,12 @@ async def test_type_text_default_interval(self, input_element): @pytest.mark.asyncio async def test_get_parent_element_success(self, web_element): """Test successful parent element retrieval.""" - script_response = { - 'result': { - 'result': { - 'objectId': 'parent-object-id' - } - } - } + script_response = {'result': {'result': {'objectId': 'parent-object-id'}}} describe_response = { 'result': { 'node': { 'nodeName': 'DIV', - 'attributes': ['id', 'parent-container', 'class', 'container'] + 'attributes': ['id', 'parent-container', 'class', 'container'], } } } @@ -329,7 +339,7 @@ async def test_get_parent_element_success(self, web_element): script_response, # Script execution describe_response, # Describe node ] - + parent_element = await web_element.get_parent_element() assert isinstance(parent_element, WebElement) @@ -337,18 +347,14 @@ async def test_get_parent_element_success(self, web_element): assert parent_element._attributes == { 'id': 'parent-container', 'class_name': 'container', - 'tag_name': 'div' + 'tag_name': 'div', } web_element._connection_handler.execute_command.assert_called() @pytest.mark.asyncio async def test_get_parent_element_not_found(self, web_element): """Test parent element not found raises ElementNotFound.""" - script_response = { - 'result': { - 'result': {} # No objectId - } - } + script_response = {'result': {'result': {}}} # No objectId web_element._connection_handler.execute_command.return_value = script_response @@ -358,33 +364,31 @@ async def test_get_parent_element_not_found(self, web_element): @pytest.mark.asyncio async def test_get_parent_element_with_complex_attributes(self, web_element): """Test parent element with complex attribute list.""" - script_response = { - 'result': { - 'result': { - 'objectId': 'complex-parent-id' - } - } - } + script_response = {'result': {'result': {'objectId': 'complex-parent-id'}}} describe_response = { 'result': { 'node': { 'nodeName': 'SECTION', 'attributes': [ - 'id', 'main-section', - 'class', 'content-wrapper', - 'data-testid', 'parent-element', - 'aria-label', 'Main content area' - ] + 'id', + 'main-section', + 'class', + 'content-wrapper', + 'data-testid', + 'parent-element', + 'aria-label', + 'Main content area', + ], } } } - + web_element._connection_handler.execute_command.side_effect = [ script_response, describe_response, ] - + parent_element = await web_element.get_parent_element() assert isinstance(parent_element, WebElement) @@ -394,42 +398,28 @@ async def test_get_parent_element_with_complex_attributes(self, web_element): 'class_name': 'content-wrapper', 'data-testid': 'parent-element', 'aria-label': 'Main content area', - 'tag_name': 'section' + 'tag_name': 'section', } @pytest.mark.asyncio async def test_get_parent_element_root_element(self, web_element): """Test getting parent of root element (should return document body).""" - script_response = { - 'result': { - 'result': { - 'objectId': 'body-object-id' - } - } - } - + script_response = {'result': {'result': {'objectId': 'body-object-id'}}} + describe_response = { - 'result': { - 'node': { - 'nodeName': 'BODY', - 'attributes': ['class', 'page-body'] - } - } + 'result': {'node': {'nodeName': 'BODY', 'attributes': ['class', 'page-body']}} } - + web_element._connection_handler.execute_command.side_effect = [ script_response, describe_response, ] - + parent_element = await web_element.get_parent_element() assert isinstance(parent_element, WebElement) assert parent_element._object_id == 'body-object-id' - assert parent_element._attributes == { - 'class_name': 'page-body', - 'tag_name': 'body' - } + assert parent_element._attributes == {'class_name': 'page-body', 'tag_name': 'body'} class TestWebElementKeyboardInteraction: @@ -440,27 +430,27 @@ async def test_key_down(self, web_element): """Test key_down method.""" key = Key.ENTER modifiers = KeyModifier.CTRL - + await web_element.key_down(key, modifiers) - + web_element._connection_handler.execute_command.assert_called_once() @pytest.mark.asyncio async def test_key_down_without_modifiers(self, web_element): """Test key_down without modifiers.""" key = Key.TAB - + await web_element.key_down(key) - + web_element._connection_handler.execute_command.assert_called_once() @pytest.mark.asyncio async def test_key_up(self, web_element): """Test key_up method.""" key = Key.ESCAPE - + await web_element.key_up(key) - + web_element._connection_handler.execute_command.assert_called_once() @pytest.mark.asyncio @@ -468,10 +458,10 @@ async def test_press_keyboard_key(self, web_element): """Test press_keyboard_key method (key down + up).""" key = Key.SPACE modifiers = KeyModifier.SHIFT - + with patch('asyncio.sleep') as mock_sleep: await web_element.press_keyboard_key(key, modifiers, interval=0.05) - + # Should call key_down and key_up assert web_element._connection_handler.execute_command.call_count == 2 mock_sleep.assert_called_once_with(0.05) @@ -480,10 +470,10 @@ async def test_press_keyboard_key(self, web_element): async def test_press_keyboard_key_default_interval(self, web_element): """Test press_keyboard_key with default interval.""" key = Key.ENTER - + with patch('asyncio.sleep') as mock_sleep: await web_element.press_keyboard_key(key) - + mock_sleep.assert_called_once_with(0.1) @@ -496,12 +486,10 @@ async def test_click_using_js_success(self, web_element): # Mock element visibility and click success web_element.is_visible = AsyncMock(return_value=True) web_element.scroll_into_view = AsyncMock() - web_element.execute_script = AsyncMock( - return_value={'result': {'result': {'value': True}}} - ) - + web_element.execute_script = AsyncMock(return_value={'result': {'result': {'value': True}}}) + await web_element.click_using_js() - + web_element.scroll_into_view.assert_called_once() web_element.is_visible.assert_called_once() @@ -530,9 +518,9 @@ async def test_click_using_js_not_interactable(self, web_element): async def test_click_using_js_option_element(self, option_element): """Test JavaScript click on option element uses specialized method.""" option_element._click_option_tag = AsyncMock() - + await option_element.click_using_js() - + option_element._click_option_tag.assert_called_once() @pytest.mark.asyncio @@ -546,10 +534,10 @@ async def test_click_success(self, web_element): None, # mouse press None, # mouse release ] - + with patch('asyncio.sleep') as mock_sleep: await web_element.click(x_offset=5, y_offset=10, hold_time=0.2) - + # Should call mouse press and release assert web_element._connection_handler.execute_command.call_count == 3 mock_sleep.assert_called_once_with(0.2) @@ -566,9 +554,9 @@ async def test_click_not_visible(self, web_element): async def test_click_option_element(self, option_element): """Test click on option element uses specialized method.""" option_element._click_option_tag = AsyncMock() - + await option_element.click() - + option_element._click_option_tag.assert_called_once() @pytest.mark.asyncio @@ -576,7 +564,7 @@ async def test_click_bounds_fallback_to_js(self, web_element): """Test click falls back to JS bounds when CDP bounds fail.""" web_element.is_visible = AsyncMock(return_value=True) web_element.scroll_into_view = AsyncMock() - + # First call (bounds) raises KeyError, second call (JS bounds) succeeds js_bounds = {'x': 10, 'y': 20, 'width': 100, 'height': 50} web_element._connection_handler.execute_command.side_effect = [ @@ -585,9 +573,9 @@ async def test_click_bounds_fallback_to_js(self, web_element): None, # mouse press None, # mouse release ] - + await web_element.click() - + # Should call bounds, JS bounds, mouse press, and mouse release assert web_element._connection_handler.execute_command.call_count == 4 @@ -595,7 +583,7 @@ async def test_click_bounds_fallback_to_js(self, web_element): async def test_click_option_tag_method(self, option_element): """Test _click_option_tag method.""" await option_element._click_option_tag() - + # Should execute script with option value option_element._connection_handler.execute_command.assert_called_once() @@ -607,16 +595,16 @@ class TestWebElementFileInput: async def test_set_input_files_success(self, file_input_element): """Test successful file input setting.""" files = ['/path/to/file1.txt', '/path/to/file2.pdf'] - + await file_input_element.set_input_files(files) - + file_input_element._connection_handler.execute_command.assert_called_once() @pytest.mark.asyncio async def test_set_input_files_not_file_input(self, web_element): """Test set_input_files on non-file input element.""" files = ['/path/to/file.txt'] - + with pytest.raises(ElementNotAFileInput): await web_element.set_input_files(files) @@ -624,7 +612,7 @@ async def test_set_input_files_not_file_input(self, web_element): async def test_set_input_files_input_but_wrong_type(self, input_element): """Test set_input_files on input element with wrong type.""" files = ['/path/to/file.txt'] - + with pytest.raises(ElementNotAFileInput): await input_element.set_input_files(files) @@ -637,22 +625,22 @@ async def test_take_screenshot_success(self, web_element, tmp_path): """Test successful element screenshot.""" bounds = {'x': 10, 'y': 20, 'width': 100, 'height': 50} screenshot_data = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/wcAAgAB/edzE+oAAAAASUVORK5CYII=' - + web_element._connection_handler.execute_command.side_effect = [ {'result': {'result': {'value': json.dumps(bounds)}}}, # get_bounds_using_js {'result': {'data': screenshot_data}}, # capture_screenshot ] - + screenshot_path = tmp_path / 'element.jpg' - + # Mock aiofiles.open properly for async context manager mock_file = AsyncMock() mock_file.write = AsyncMock() - + with patch('aiofiles.open') as mock_aiofiles_open: mock_aiofiles_open.return_value.__aenter__.return_value = mock_file await web_element.take_screenshot(str(screenshot_path), quality=90) - + # Should call get_bounds_using_js and capture_screenshot assert web_element._connection_handler.execute_command.call_count == 2 @@ -665,17 +653,17 @@ async def test_take_screenshot_default_quality(self, web_element, tmp_path): {'result': {'result': {'value': json.dumps(bounds)}}}, {'result': {'data': screenshot_data}}, ] - + screenshot_path = tmp_path / 'element_default.jpg' - + # Mock aiofiles.open properly for async context manager mock_file = AsyncMock() mock_file.write = AsyncMock() - + with patch('aiofiles.open') as mock_aiofiles_open: mock_aiofiles_open.return_value.__aenter__.return_value = mock_file await web_element.take_screenshot(str(screenshot_path)) - + # Should call get_bounds_using_js and capture_screenshot assert web_element._connection_handler.execute_command.call_count == 2 @@ -686,10 +674,8 @@ class TestWebElementVisibility: @pytest.mark.asyncio async def test_is_element_visible_true(self, web_element): """Test _is_element_visible returns True.""" - web_element.execute_script = AsyncMock( - return_value={'result': {'result': {'value': True}}} - ) - + web_element.execute_script = AsyncMock(return_value={'result': {'result': {'value': True}}}) + result = await web_element.is_visible() assert result is True @@ -699,17 +685,15 @@ async def test_is_element_visible_false(self, web_element): web_element.execute_script = AsyncMock( return_value={'result': {'result': {'value': False}}} ) - + result = await web_element.is_visible() assert result is False @pytest.mark.asyncio async def test_is_element_on_top_true(self, web_element): """Test _is_element_on_top returns True.""" - web_element.execute_script = AsyncMock( - return_value={'result': {'result': {'value': True}}} - ) - + web_element.execute_script = AsyncMock(return_value={'result': {'result': {'value': True}}}) + result = await web_element.is_on_top() assert result is True @@ -726,9 +710,7 @@ async def test_is_element_on_top_false(self, web_element): @pytest.mark.asyncio async def test_is_element_interactable_true(self, web_element): """Test _is_element_interactable returns True.""" - web_element.execute_script = AsyncMock( - return_value={'result': {'result': {'value': True}}} - ) + web_element.execute_script = AsyncMock(return_value={'result': {'result': {'value': True}}}) result = await web_element.is_interactable() assert result is True @@ -752,8 +734,7 @@ async def test_wait_until_visible_success(self, web_element): """Test wait_until succeeds when element becomes visible.""" web_element.is_visible = AsyncMock(side_effect=[False, True]) - with patch('asyncio.sleep') as mock_sleep, \ - patch('asyncio.get_event_loop') as mock_loop: + with patch('asyncio.sleep') as mock_sleep, patch('asyncio.get_event_loop') as mock_loop: mock_loop.return_value.time.side_effect = [0, 0.5] await web_element.wait_until(is_visible=True, timeout=2) @@ -766,13 +747,10 @@ async def test_wait_until_visible_timeout(self, web_element): """Test wait_until raises WaitElementTimeout when visibility not met.""" web_element.is_visible = AsyncMock(return_value=False) - with patch('asyncio.sleep') as mock_sleep, \ - patch('asyncio.get_event_loop') as mock_loop: + with patch('asyncio.sleep') as mock_sleep, patch('asyncio.get_event_loop') as mock_loop: mock_loop.return_value.time.side_effect = [0, 0.5, 1.0, 1.5, 2.1] - with pytest.raises( - WaitElementTimeout, match='element to become visible' - ): + with pytest.raises(WaitElementTimeout, match='element to become visible'): await web_element.wait_until(is_visible=True, timeout=2) assert mock_sleep.call_count == 3 @@ -791,13 +769,10 @@ async def test_wait_until_interactable_timeout(self, web_element): """Test wait_until raises WaitElementTimeout when not interactable.""" web_element.is_interactable = AsyncMock(return_value=False) - with patch('asyncio.sleep') as mock_sleep, \ - patch('asyncio.get_event_loop') as mock_loop: + with patch('asyncio.sleep') as mock_sleep, patch('asyncio.get_event_loop') as mock_loop: mock_loop.return_value.time.side_effect = [0, 0.5, 1.1] - with pytest.raises( - WaitElementTimeout, match='element to become interactable' - ): + with pytest.raises(WaitElementTimeout, match='element to become interactable'): await web_element.wait_until(is_interactable=True, timeout=1) mock_sleep.assert_called_once_with(0.5) @@ -808,13 +783,10 @@ async def test_wait_until_visible_and_interactable(self, web_element): web_element.is_visible = AsyncMock(side_effect=[False, True]) web_element.is_interactable = AsyncMock(side_effect=[False, True]) - with patch('asyncio.sleep') as mock_sleep, \ - patch('asyncio.get_event_loop') as mock_loop: + with patch('asyncio.sleep') as mock_sleep, patch('asyncio.get_event_loop') as mock_loop: mock_loop.return_value.time.side_effect = [0, 0.5, 1.0] - await web_element.wait_until( - is_visible=True, is_interactable=True, timeout=2 - ) + await web_element.wait_until(is_visible=True, is_interactable=True, timeout=2) assert web_element.is_visible.call_count == 2 assert web_element.is_interactable.call_count == 2 @@ -857,9 +829,7 @@ def test_is_option_tag_false(self, web_element): def test_def_attributes_empty_list(self, mock_connection_handler): """Test _def_attributes with empty list.""" element = WebElement( - object_id='test', - connection_handler=mock_connection_handler, - attributes_list=[] + object_id='test', connection_handler=mock_connection_handler, attributes_list=[] ) assert element._attributes == {} @@ -869,7 +839,7 @@ def test_def_attributes_class_rename(self, mock_connection_handler): element = WebElement( object_id='test', connection_handler=mock_connection_handler, - attributes_list=attributes_list + attributes_list=attributes_list, ) assert element._attributes == {'class_name': 'my-class', 'id': 'my-id'} @@ -879,16 +849,18 @@ async def test_execute_script(self, web_element): script = 'return this.tagName;' expected_response = {'result': {'result': {'value': 'DIV'}}} web_element._connection_handler.execute_command.return_value = expected_response - + result = await web_element.execute_script(script, return_by_value=True) - + assert result == expected_response expected_command = RuntimeCommands.call_function_on( object_id='test-object-id', function_declaration=script, return_by_value=True, ) - web_element._connection_handler.execute_command.assert_called_once_with(expected_command, timeout=60) + web_element._connection_handler.execute_command.assert_called_once_with( + expected_command, timeout=60 + ) def test_repr(self, web_element): """Test __repr__ method.""" @@ -907,18 +879,16 @@ async def test_find_element_success(self, web_element): """Test successful element finding.""" node_response = {'result': {'result': {'objectId': 'found-element-id'}}} describe_response = { - 'result': { - 'node': {'nodeName': 'BUTTON', 'attributes': ['class', 'btn']} - } + 'result': {'node': {'nodeName': 'BUTTON', 'attributes': ['class', 'btn']}} } - + web_element._connection_handler.execute_command.side_effect = [ node_response, describe_response, ] - + element = await web_element.find(id='button-id') - + assert isinstance(element, WebElement) assert element._object_id == 'found-element-id' assert element._attributes['class_name'] == 'btn' @@ -926,20 +896,16 @@ async def test_find_element_success(self, web_element): @pytest.mark.asyncio async def test_find_element_not_found_with_exception(self, web_element): """Test element not found raises exception.""" - web_element._connection_handler.execute_command.return_value = { - 'result': {'result': {}} - } - + web_element._connection_handler.execute_command.return_value = {'result': {'result': {}}} + with pytest.raises(ElementNotFound): await web_element.find(id='nonexistent') @pytest.mark.asyncio async def test_find_element_not_found_no_exception(self, web_element): """Test element not found returns None when raise_exc=False.""" - web_element._connection_handler.execute_command.return_value = { - 'result': {'result': {}} - } - + web_element._connection_handler.execute_command.return_value = {'result': {'result': {}}} + result = await web_element.find(id='nonexistent', raise_exc=False) assert result is None @@ -956,20 +922,18 @@ async def test_find_elements_success(self, web_element): } } describe_response = { - 'result': { - 'node': {'nodeName': 'LI', 'attributes': ['class', 'item']} - } + 'result': {'node': {'nodeName': 'LI', 'attributes': ['class', 'item']}} } - + web_element._connection_handler.execute_command.side_effect = [ find_response, properties_response, describe_response, describe_response, ] - + elements = await web_element.find(class_name='item', find_all=True) - + assert len(elements) == 2 assert all(isinstance(elem, WebElement) for elem in elements) assert elements[0]._object_id == 'child-1' @@ -979,22 +943,18 @@ async def test_find_elements_success(self, web_element): async def test_find_with_timeout_success(self, web_element): """Test find with timeout succeeds on retry.""" node_response = {'result': {'result': {'objectId': 'delayed-element'}}} - describe_response = { - 'result': { - 'node': {'nodeName': 'DIV', 'attributes': []} - } - } - + describe_response = {'result': {'node': {'nodeName': 'DIV', 'attributes': []}}} + # First call returns empty, second call succeeds web_element._connection_handler.execute_command.side_effect = [ {'result': {'result': {}}}, # First attempt fails node_response, # Second attempt succeeds describe_response, ] - + with patch('asyncio.sleep') as mock_sleep: element = await web_element.find(id='delayed', timeout=2) - + assert isinstance(element, WebElement) assert element._object_id == 'delayed-element' mock_sleep.assert_called() @@ -1002,13 +962,17 @@ async def test_find_with_timeout_success(self, web_element): @pytest.mark.asyncio async def test_find_with_timeout_failure(self, web_element): """Test find with timeout raises WaitElementTimeout.""" - web_element._connection_handler.execute_command.return_value = { - 'result': {'result': {}} - } - + web_element._connection_handler.execute_command.return_value = {'result': {'result': {}}} + with patch('asyncio.get_event_loop') as mock_loop: - mock_loop.return_value.time.side_effect = [0, 0.5, 1.0, 1.5, 2.1] # Simulate time progression - + mock_loop.return_value.time.side_effect = [ + 0, + 0.5, + 1.0, + 1.5, + 2.1, + ] # Simulate time progression + with pytest.raises(WaitElementTimeout): await web_element.find(id='never-appears', timeout=2) @@ -1017,18 +981,16 @@ async def test_query_css_selector(self, web_element): """Test query method with CSS selector.""" node_response = {'result': {'result': {'objectId': 'queried-element'}}} describe_response = { - 'result': { - 'node': {'nodeName': 'A', 'attributes': ['href', 'http://example.com']} - } + 'result': {'node': {'nodeName': 'A', 'attributes': ['href', 'http://example.com']}} } - + web_element._connection_handler.execute_command.side_effect = [ node_response, describe_response, ] - + element = await web_element.query('a[href*="example"]') - + assert isinstance(element, WebElement) assert element._object_id == 'queried-element' @@ -1036,25 +998,23 @@ async def test_query_css_selector(self, web_element): async def test_query_xpath(self, web_element): """Test query method with XPath expression.""" node_response = {'result': {'result': {'objectId': 'xpath-element'}}} - describe_response = { - 'result': { - 'node': {'nodeName': 'SPAN', 'attributes': []} - } - } - + describe_response = {'result': {'node': {'nodeName': 'SPAN', 'attributes': []}}} + web_element._connection_handler.execute_command.side_effect = [ node_response, describe_response, ] - + element = await web_element.query('//span[text()="Click me"]') - + assert isinstance(element, WebElement) assert element._object_id == 'xpath-element' def test_find_no_criteria_raises_error(self, web_element): """Test find with no search criteria raises ValueError.""" - with pytest.raises(ValueError, match='At least one of the following arguments must be provided'): + with pytest.raises( + ValueError, match='At least one of the following arguments must be provided' + ): asyncio.run(web_element.find()) @@ -1065,7 +1025,7 @@ class TestWebElementEdgeCases: async def test_bounds_property_with_connection_error(self, web_element): """Test bounds property when connection fails.""" web_element._connection_handler.execute_command.side_effect = Exception("Connection failed") - + with pytest.raises(Exception, match="Connection failed"): await web_element.bounds @@ -1076,7 +1036,7 @@ async def test_text_property_with_malformed_html(self, web_element): web_element._connection_handler.execute_command.return_value = { 'result': {'outerHTML': malformed_html} } - + # BeautifulSoup should handle malformed HTML gracefully text = await web_element.text assert 'Unclosed tag' in text @@ -1093,10 +1053,10 @@ async def test_click_with_zero_hold_time(self, web_element): None, # mouse press None, # mouse release ] - + with patch('asyncio.sleep') as mock_sleep: await web_element.click(hold_time=0) - + mock_sleep.assert_called_once_with(0) @pytest.mark.asyncio @@ -1113,9 +1073,317 @@ async def test_type_text_empty_string(self, input_element): async def test_set_input_files_empty_list(self, file_input_element): """Test set_input_files with empty file list.""" await file_input_element.set_input_files([]) - + expected_command = DomCommands.set_file_input_files( - files=[], - object_id='file-input-object-id' + files=[], object_id='file-input-object-id' + ) + file_input_element._connection_handler.execute_command.assert_called_once_with( + expected_command, timeout=60 ) - file_input_element._connection_handler.execute_command.assert_called_once_with(expected_command, timeout=60) + + +class TestWebElementGetChildren: + """Integration tests for WebElement get_children_elements method using real HTML.""" + + @pytest.mark.asyncio + async def test_get_children_elements_basic(self, ci_chrome_options): + """Test get_children_elements with basic depth using real HTML.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find the parent element + parent_element = await tab.find(id='parent-element') + + # Test get_children_elements with depth 3 + nodes = await parent_element.get_children_elements(3) + + # Verify results - should get all direct children and nested children up to depth 3 + assert len(nodes) > 0 + assert all(isinstance(node, WebElement) for node in nodes) + + # Check that we have the expected direct children + child_ids = [] + for node in nodes: + node_id = node.get_attribute('id') + if node_id: + child_ids.append(node_id) + + # Should include direct children + expected_direct_children = [ + 'child1', + 'child2', + 'child3', + 'link1', + 'link2', + 'nested-parent', + ] + for expected_id in expected_direct_children: + assert ( + expected_id in child_ids + ), f"Expected child {expected_id} not found in {child_ids}" + + # Should also include nested children (depth 3) + expected_nested_children = ['nested-child1', 'nested-child2', 'nested-link'] + for expected_id in expected_nested_children: + assert ( + expected_id in child_ids + ), f"Expected nested child {expected_id} not found in {child_ids}" + + @pytest.mark.asyncio + async def test_get_children_elements_with_tag_filter(self, ci_chrome_options): + """Test get_children_elements with tag filter using real HTML.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find the parent element + parent_element = await tab.find(id='parent-element') + + # Test get_children_elements with tag filter for 'a' tags + nodes_filter = await parent_element.get_children_elements(4, ['a']) + + # Verify results - should only get anchor tags + assert len(nodes_filter) > 0 + assert all(isinstance(node, WebElement) for node in nodes_filter) + + # Check that all returned elements are anchor tags + for node in nodes_filter: + tag_name = node.get_attribute('tag_name') + assert tag_name.lower() == 'a', f"Expected 'a' tag, got '{tag_name}'" + + # Check that we have the expected anchor elements + link_ids = [] + for node in nodes_filter: + node_id = node.get_attribute('id') + if node_id: + link_ids.append(node_id) + + # Should include both direct and nested anchor tags + expected_links = ['link1', 'link2', 'nested-link'] + for expected_id in expected_links: + assert ( + expected_id in link_ids + ), f"Expected link {expected_id} not found in {link_ids}" + + @pytest.mark.asyncio + async def test_get_children_elements_depth_limit(self, ci_chrome_options): + """Test get_children_elements with depth limit.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find the parent element + parent_element = await tab.find(id='parent-element') + + # Test with depth 1 - should only get direct children + nodes_depth_1 = await parent_element.get_children_elements(1) + + # Get IDs of elements found with depth 1 + depth_1_ids = [] + for node in nodes_depth_1: + node_id = node.get_attribute('id') + if node_id: + depth_1_ids.append(node_id) + + # Should include direct children but not nested ones + expected_direct = ['child1', 'child2', 'child3', 'link1', 'link2', 'nested-parent'] + for expected_id in expected_direct: + assert expected_id in depth_1_ids, f"Expected direct child {expected_id} not found" + + # Should NOT include nested children with depth 1 + unexpected_nested = ['nested-child1', 'nested-child2', 'nested-link'] + for unexpected_id in unexpected_nested: + assert ( + unexpected_id not in depth_1_ids + ), f"Unexpected nested child {unexpected_id} found with depth 1" + + @pytest.mark.asyncio + async def test_get_children_elements_empty_result(self, ci_chrome_options): + """Test get_children_elements on element with no children.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find a leaf element (no children) + leaf_element = await tab.find(id='child1') + + # Test get_children_elements on element with no children + nodes = await leaf_element.get_children_elements(2) + + # Should return empty list + assert isinstance(nodes, list) + assert len(nodes) == 0 + + @pytest.mark.asyncio + async def test_get_children_elements_element_not_found_exception(self): + """Test get_children_elements raises ElementNotFound when script fails.""" + # Create a mock element that will fail the script execution + mock_connection_handler = AsyncMock() + + # Mock script result without objectId (simulates script failure) + mock_connection_handler.execute_command.return_value = { + 'result': {'result': {}} # No objectId key + } + + # Create a WebElement with the mock connection + element = WebElement( + object_id='test-element-id', + connection_handler=mock_connection_handler, + attributes_list=['id', 'test-element', 'tag_name', 'div'], + ) + + # Should raise ElementNotFound when script returns no objectId + with pytest.raises(ElementNotFound): + await element.get_children_elements(1, raise_exc=True) + + @pytest.mark.asyncio + async def test_get_siblings_elements_basic(self, ci_chrome_options): + """Test get_siblings_elements with basic functionality using real HTML.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find one of the child elements to get its siblings + child_element = await tab.find(id='child2') + + # Test get_siblings_elements + siblings = await child_element.get_siblings_elements() + + # Verify results - should get all sibling elements + assert len(siblings) > 0 + assert all(isinstance(sibling, WebElement) for sibling in siblings) + + # Check that we have the expected siblings + sibling_ids = [] + for sibling in siblings: + sibling_id = sibling.get_attribute('id') + if sibling_id: + sibling_ids.append(sibling_id) + + # Should include all siblings of child2 (child1, child3, link1, link2, nested-parent) + # but NOT child2 itself + expected_siblings = ['child1', 'child3', 'link1', 'link2', 'nested-parent'] + for expected_id in expected_siblings: + assert ( + expected_id in sibling_ids + ), f"Expected sibling {expected_id} not found in {sibling_ids}" + + # Should NOT include the element itself + assert 'child2' not in sibling_ids, "Element should not include itself in siblings" + + @pytest.mark.asyncio + async def test_get_siblings_elements_with_tag_filter(self, ci_chrome_options): + """Test get_siblings_elements with tag filter.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find one of the child elements to get its siblings + child_element = await tab.find(id='child1') + + # Test get_siblings_elements with tag filter for 'a' tags only + siblings_filter = await child_element.get_siblings_elements(tag_filter=['a']) + + # Get IDs of filtered siblings + sibling_ids = [] + for sibling in siblings_filter: + sibling_id = sibling.get_attribute('id') + if sibling_id: + sibling_ids.append(sibling_id) + + # Should include only anchor tag siblings + expected_links = ['link1', 'link2'] + for expected_id in expected_links: + assert ( + expected_id in sibling_ids + ), f"Expected link sibling {expected_id} not found in {sibling_ids}" + + # Should NOT include non-anchor siblings + unexpected_siblings = ['child2', 'child3', 'nested-parent'] + for unexpected_id in unexpected_siblings: + assert ( + unexpected_id not in sibling_ids + ), f"Unexpected non-anchor sibling {unexpected_id} found with tag filter" + + @pytest.mark.asyncio + async def test_get_siblings_elements_empty_result(self, ci_chrome_options): + """Test get_siblings_elements on element with no siblings.""" + + # Get the path to our test HTML file + test_file = Path(__file__).parent / 'pages' / 'test_children.html' + file_url = f'file://{test_file.absolute()}' + + async with Chrome(options=ci_chrome_options) as browser: + tab = await browser.start() + await tab.go_to(file_url) + + # Find the parent element which should have no siblings at its level + parent_element = await tab.find(id='parent-element') + + # Test get_siblings_elements on element with no siblings + siblings = await parent_element.get_siblings_elements() + + # Should return list with only the other parent element as sibling + assert isinstance(siblings, list) + # Should have at least one sibling (another-parent) + sibling_ids = [] + for sibling in siblings: + sibling_id = sibling.get_attribute('id') + if sibling_id: + sibling_ids.append(sibling_id) + + # Should include the other parent element + assert 'another-parent' in sibling_ids + + @pytest.mark.asyncio + async def test_get_siblings_elements_element_not_found_exception(self): + """Test get_siblings_elements raises ElementNotFound when script fails.""" + # Create a mock element that will fail the script execution + mock_connection_handler = AsyncMock() + + # Mock script result without objectId (simulates script failure) + mock_connection_handler.execute_command.return_value = { + 'result': {'result': {}} # No objectId key + } + + # Create a WebElement with the mock connection + element = WebElement( + object_id='test-element-id', + connection_handler=mock_connection_handler, + attributes_list=['id', 'test-element', 'tag_name', 'div'], + ) + + # Should raise ElementNotFound when script returns no objectId + with pytest.raises(ElementNotFound): + await element.get_siblings_elements(raise_exc=True) From 44911ecf11aaa9aa6bab3350e4c81bc72f33ec33 Mon Sep 17 00:00:00 2001 From: Thalison Fernandes Date: Fri, 3 Oct 2025 15:29:31 -0300 Subject: [PATCH 2/3] test: add tests for TopLevelTargetRequired --- tests/test_browser/test_browser_tab.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_browser/test_browser_tab.py b/tests/test_browser/test_browser_tab.py index 515e499a..d443b085 100644 --- a/tests/test_browser/test_browser_tab.py +++ b/tests/test_browser/test_browser_tab.py @@ -23,6 +23,7 @@ WaitElementTimeout, NetworkEventsNotEnabled, InvalidScriptWithElement, + TopLevelTargetRequired, ) @pytest_asyncio.fixture @@ -424,6 +425,14 @@ async def test_take_screenshot_beyond_viewport(self, tab): assert command['params']['captureBeyondViewport'] is True assert result == screenshot_data + @pytest.mark.asyncio + async def test_take_screenshot_in_iframe_raises_top_level_required(self, tab): + """Tab.take_screenshot must be called on top-level targets; iframe Tab raises.""" + # Simulate CDP returning no image data (missing 'data' key) for non top-level target + with patch.object(tab, '_execute_command', AsyncMock(return_value={'result': {}})): + with pytest.raises(TopLevelTargetRequired): + await tab.take_screenshot(path=None, as_base64=True) + @pytest.mark.asyncio async def test_print_to_pdf_to_file(self, tab, tmp_path): """Test printing to PDF and saving to file.""" From d70e3694f22a02dce8108337b677f06103589ed3 Mon Sep 17 00:00:00 2001 From: Thalison Fernandes Date: Fri, 3 Oct 2025 15:29:49 -0300 Subject: [PATCH 3/3] docs: update docs to include details about take_screenshot method --- public/docs/deep-dive/tab-domain.md | 15 +++++++++++++++ public/docs/features.md | 3 +++ public/docs/zh/deep-dive/tab-domain.md | 15 +++++++++++++++ public/docs/zh/features.md | 3 +++ 4 files changed, 36 insertions(+) diff --git a/public/docs/deep-dive/tab-domain.md b/public/docs/deep-dive/tab-domain.md index 1b304b38..630a30af 100644 --- a/public/docs/deep-dive/tab-domain.md +++ b/public/docs/deep-dive/tab-domain.md @@ -368,6 +368,21 @@ These visual capture capabilities are invaluable for: - Debugging automation scripts - Archiving page content +!!! warning "Top-level targets vs iFrames for Tab screenshots" + `Tab.take_screenshot()` relies on CDP's `Page.captureScreenshot`, which only works for top-level targets. If you obtained a `Tab` for an iframe using `await tab.get_frame(iframe_element)`, calling `take_screenshot()` on that iframe tab will raise `TopLevelTargetRequired`. + + Use `WebElement.take_screenshot()` inside iframes. It captures via the viewport and works within the iframe context. + + ```python + # Wrong: iframe Tab screenshot (raises TopLevelTargetRequired) + iframe_tab = await tab.get_frame(iframe_element) + await iframe_tab.take_screenshot(as_base64=True) # will raise an exception + + # Correct: element screenshot inside iframe (uses viewport) + element = await iframe_tab.find(id='captcha') + await element.take_screenshot('captcha.png') # will work! + ``` + ## Event System Overview The Tab domain provides a comprehensive event system for monitoring and reacting to browser events: diff --git a/public/docs/features.md b/public/docs/features.md index 0e6497b2..acac8183 100644 --- a/public/docs/features.md +++ b/public/docs/features.md @@ -57,6 +57,9 @@ Capture visual content from web pages: - **High-Quality PDF Export**: Generate PDF documents from web pages - **Custom Formatting**: Coming soon! +!!! note "Screenshots in iFrames and top-level targets" + `tab.take_screenshot()` only works on top-level targets. When working inside an `iframe` (using `await tab.get_frame(iframe_element)`), Chrome's `Page.captureScreenshot` cannot capture the subtarget directly. In these scenarios, use `WebElement.take_screenshot()` instead—it captures via viewport and works inside iframes. + ## Remote Connections and Hybrid Automation ### Connect to a running browser via WebSocket diff --git a/public/docs/zh/deep-dive/tab-domain.md b/public/docs/zh/deep-dive/tab-domain.md index f552f78d..c338e937 100644 --- a/public/docs/zh/deep-dive/tab-domain.md +++ b/public/docs/zh/deep-dive/tab-domain.md @@ -363,6 +363,21 @@ await tab.print_to_pdf( - 调试自动化脚本 - 存档页面内容 +!!! 警告 "顶层目标与 iframe 的截图差异" + `Tab.take_screenshot()` 依赖 CDP 的 `Page.captureScreenshot`,该能力仅适用于顶层目标(top-level target)。如果通过 `await tab.get_frame(iframe_element)` 获取了 iframe 对应的 `Tab`,在此 `Tab` 上调用 `take_screenshot()` 会抛出 `TopLevelTargetRequired`。 + + 在 iframe 内请使用 `WebElement.take_screenshot()`。它基于视口(viewport)进行捕获,适用于 iframe 场景。 + + ```python + # 错误:在 iframe Tab 上截图(会抛出 TopLevelTargetRequired) + iframe_tab = await tab.get_frame(iframe_element) + await iframe_tab.take_screenshot(as_base64=True) # 会抛出异常 + + # 正确:在 iframe 内对元素截图(基于视口) + element = await iframe_tab.find(id='captcha') + await element.take_screenshot('captcha.png') # 会正常工作! + ``` + ## 事件系统概述 Tab 域提供了一个全面的事件系统,用于监控和响应浏览器事件: diff --git a/public/docs/zh/features.md b/public/docs/zh/features.md index 0d2d6f59..3653f2f1 100644 --- a/public/docs/zh/features.md +++ b/public/docs/zh/features.md @@ -59,6 +59,9 @@ Pydoll支持操作任何Chromium核心的浏览器: - **高质量 PDF 导出**:从网页生成 PDF 文档 - **自定义格式**:即将推出! +!!! 信息 "关于 iframe 与顶层目标的截图" + `tab.take_screenshot()` 仅适用于顶层目标(top-level target)。在 `iframe` 内(通过 `await tab.get_frame(iframe_element)` 获取的子目标)时,Chrome 的 `Page.captureScreenshot` 无法直接对该子目标截图。这种情况下请改用 `WebElement.take_screenshot()`,它基于视口(viewport)进行捕获,适用于 iframe 内部。 + ## 远程连接与混合自动化 ### 通过 WebSocket 连接已运行的浏览器