diff --git a/pydoll/browser/base.py b/pydoll/browser/base.py index a971f3a3..58673e78 100644 --- a/pydoll/browser/base.py +++ b/pydoll/browser/base.py @@ -60,14 +60,36 @@ def __init__( self._pages = [] async def __aenter__(self): + """ + Async context manager entry point. + + Returns: + Browser: The browser instance. + """ return self async def __aexit__(self, exc_type, exc_val, exc_tb): + """ + Async context manager exit point. + + Args: + exc_type: The exception type, if raised. + exc_val: The exception value, if raised. + exc_tb: The traceback, if an exception was raised. + """ await self.stop() await self._connection_handler.close() async def start(self) -> None: - """Método principal para iniciar o navegador.""" + """ + Main method to start the browser. + + This method initializes the browser process and configures + all necessary settings to create a working browser instance. + + Returns: + None + """ binary_location = ( self.options.binary_location or self._get_default_binary_location() ) @@ -110,6 +132,9 @@ async def get_page(self) -> Page: """ Retrieves a Page instance for an existing page in the browser. If no pages are open, a new page will be created. + + Returns: + Page: A Page instance connected to an existing or new browser page. """ page_id = ( await self.new_page() if not self._pages else self._pages.pop() @@ -155,7 +180,9 @@ async def on( Args: event_name (str): Name of the event to listen for. - callback (Callable): function to be called when the event occurs. + callback (callable): Function to be called when the event occurs. + temporary (bool): If True, the callback will be removed after it's + triggered once. Defaults to False. Returns: int: The ID of the registered callback. @@ -176,8 +203,12 @@ async def new_page(self, url: str = ''): """ Opens a new page in the browser. + Args: + url (str): Optional initial URL to navigate to. + Defaults to empty string. + Returns: - Page: The new page instance. + str: The ID of the new page. """ response = await self._execute_command( TargetCommands.create_target(url) @@ -424,17 +455,40 @@ async def _continue_request_auth_required( await self.disable_fetch_events() async def _init_first_page(self): + """ + Initializes the first page in the browser. + + This method obtains the first valid page from available targets + and stores its ID for later use. + + Returns: + None + """ pages = await self.get_targets() valid_page = await self._get_valid_page(pages) self._pages.append(valid_page) async def _verify_browser_running(self): - """Verifica se o navegador está rodando.""" + """ + Verifies if the browser is running. + + Raises: + BrowserNotRunning: If the browser failed to start. + """ if not await self._is_browser_running(): raise exceptions.BrowserNotRunning('Failed to start browser') async def _configure_proxy(self, private_proxy, proxy_credentials): - """Configura o proxy, se necessário.""" + """ + Configures proxy settings if needed. + + Args: + private_proxy: Boolean indicating if a private proxy is enabled. + proxy_credentials: Tuple containing proxy username and password. + + Returns: + None + """ if private_proxy: await self.enable_fetch_events(handle_auth_requests=True) await self.on( @@ -454,17 +508,28 @@ async def _configure_proxy(self, private_proxy, proxy_credentials): @staticmethod def _is_valid_page(page: dict) -> bool: - """Verifica se uma página é uma nova aba válida.""" + """ + Verifies if a page is a valid new tab. + + Args: + page (dict): Dictionary containing page information. + + Returns: + bool: True if the page is a valid new tab, False otherwise. + """ return page.get('type') == 'page' and 'chrome://newtab/' in page.get( 'url', '' ) async def _get_valid_page(self, pages) -> str: """ - Obtém o ID de uma página válida ou cria uma nova. + Gets the ID of a valid page or creates a new one. + + Args: + pages (list): List of page dictionaries to check for validity. Returns: - str: targetId da página existente ou nova + str: The target ID of an existing or new page. """ valid_page = next( (page for page in pages if self._is_valid_page(page)), None @@ -507,7 +572,15 @@ async def _execute_command(self, command: str): ) def _setup_user_dir(self): - """Prepara o diretório de dados do usuário, se necessário.""" + """ + Prepares the user data directory if needed. + + This method creates a temporary directory for browser data if + no user directory is specified in the browser options. + + Returns: + None + """ temp_dir = self._temp_directory_manager.create_temp_dir() if '--user-data-dir' not in [ arg.split('=')[0] for arg in self.options.arguments diff --git a/pydoll/browser/chrome.py b/pydoll/browser/chrome.py index 3f552f70..7425329b 100644 --- a/pydoll/browser/chrome.py +++ b/pydoll/browser/chrome.py @@ -6,13 +6,42 @@ class Chrome(Browser): + """ + A class that implements the Chrome browser functionality. + + This class provides specific implementation for launching and + controlling Google Chrome browsers. + """ + def __init__( self, options: Options | None = None, connection_port: int = 9222 ): + """ + Initializes the Chrome browser instance. + + Args: + options (Options | None): An instance of Options class to configure + the browser. If None, default options will be used. + connection_port (int): The port to connect to the browser. + Defaults to 9222. + """ super().__init__(options, connection_port) @staticmethod def _get_default_binary_location(): + """ + Gets the default location of the Chrome browser executable. + + This method determines the default Chrome executable path based + on the operating system. + + Returns: + str: The path to the Chrome browser executable. + + Raises: + ValueError: If the operating system is not supported or + the browser executable is not found at the default location. + """ os_name = platform.system() browser_paths = { 'Windows': diff --git a/pydoll/browser/managers.py b/pydoll/browser/managers.py index c430d96b..27377cfb 100644 --- a/pydoll/browser/managers.py +++ b/pydoll/browser/managers.py @@ -9,14 +9,30 @@ class ProxyManager: def __init__(self, options): + """ + Initializes the ProxyManager with browser options. + + This manager handles proxy configuration for the browser, + including extraction and management of proxy credentials. + + Args: + options: The browser options instance containing arguments. + """ self.options = options def get_proxy_credentials(self) -> tuple[bool, tuple[str, str]]: """ - Configura as configurações de proxy e extrai credenciais se presentes. + Configures proxy settings and extracts credentials if present. + + This method searches for proxy settings in the browser options, + extracts any credentials, and updates the proxy arguments to use + a clean proxy URL without embedded credentials. Returns: - tuple[bool, tuple[str, str]]: (private_proxy, (username, password)) + tuple[bool, tuple[str, str]]: A tuple containing: + - bool: True if private proxy with credentials was found + - tuple[str, str]: Username and password for proxy + authentication """ private_proxy = False credentials = (None, None) @@ -37,7 +53,16 @@ def get_proxy_credentials(self) -> tuple[bool, tuple[str, str]]: return private_proxy, credentials def _find_proxy_argument(self) -> tuple[int, str] | None: - """Encontra o primeiro argumento --proxy-server válido""" + """ + Finds the first valid --proxy-server argument in browser options. + + This method iterates through the browser arguments looking for + a proxy server configuration. + + Returns: + tuple[int, str] | None: A tuple containing the index of the + argument and the proxy value if found, None otherwise. + """ for index, arg in enumerate(self.options.arguments): if arg.startswith('--proxy-server='): return index, arg.split('=', 1)[1] @@ -45,7 +70,23 @@ def _find_proxy_argument(self) -> tuple[int, str] | None: @staticmethod def _parse_proxy(proxy_value: str) -> tuple[bool, str, str, str]: - """Extrai credenciais e limpa o valor do proxy""" + """ + Extracts credentials from proxy value and cleans the proxy string. + + This method parses a proxy URL to extract embedded credentials + (if present) in the format username:password@server. + + Args: + proxy_value (str): The proxy URL potentially containing + credentials. + + Returns: + tuple[bool, str, str, str]: A tuple containing: + - bool: True if credentials were found + - str: Username (or None if no credentials) + - str: Password (or None if no credentials) + - str: Clean proxy URL without credentials + """ if '@' not in proxy_value: return False, None, None, proxy_value @@ -57,12 +98,34 @@ def _parse_proxy(proxy_value: str) -> tuple[bool, str, str, str]: return False, None, None, proxy_value def _update_proxy_argument(self, index: int, clean_proxy: str) -> None: - """Atualiza a lista de argumentos com proxy limpo""" + """ + Updates the options arguments list with the clean proxy URL. + + This method replaces the original proxy argument (which may have + contained credentials) with a clean version that doesn't expose + sensitive data. + + Args: + index (int): The index of the proxy argument to update. + clean_proxy (str): The proxy URL without credentials. + + Returns: + None + """ self.options.arguments[index] = f'--proxy-server={clean_proxy}' class BrowserProcessManager: def __init__(self, process_creator=None): + """ + Initializes the BrowserProcessManager. + + This manager handles the creation and management of browser processes. + + Args: + process_creator (callable, optional): A function that creates a + browser process. If None, the default process creator is used. + """ self._process_creator = ( process_creator or self._default_process_creator ) @@ -71,7 +134,21 @@ def __init__(self, process_creator=None): def start_browser_process( self, binary_location: str, port: int, arguments: list ) -> None: - """Inicia o processo do navegador""" + """ + Starts the browser process with the given parameters. + + This method launches a new browser process with the specified binary, + debugging port, and command-line arguments. + + Args: + binary_location (str): Path to the browser executable. + port (int): The remote debugging port to use. + arguments (list): Additional command-line arguments for the + browser. + + Returns: + subprocess.Popen: The started browser process. + """ self._process = self._process_creator([ binary_location, f'--remote-debugging-port={port}', @@ -81,34 +158,76 @@ def start_browser_process( @staticmethod def _default_process_creator(command: list[str]): + """ + Default function to create a browser process. + + This method creates a subprocess with the given command-line arguments. + + Args: + command (list[str]): The command and arguments to start the + process. + + Returns: + subprocess.Popen: The created process instance. + """ return subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) def stop_process(self): - """Para o processo do navegador se estiver em execução""" + """ + Stops the browser process if it's running. + + This method terminates the browser process that was previously + started with start_browser_process. + + Returns: + None + """ if self._process: self._process.terminate() class TempDirectoryManager: def __init__(self, temp_dir_factory=TemporaryDirectory): + """ + Initializes the TempDirectoryManager. + + This manager handles the creation and cleanup of temporary directories + used by browser instances. + + Args: + temp_dir_factory (callable, optional): A function that creates + temporary directories. Defaults to TemporaryDirectory. + """ self._temp_dir_factory = temp_dir_factory self._temp_dirs = [] def create_temp_dir(self): """ - Cria um diretório temporário para a instância do navegador. + Creates a temporary directory for a browser instance. + + This method creates a new temporary directory and tracks it + for later cleanup. Returns: - TemporaryDirectory: O diretório temporário. + TemporaryDirectory: The created temporary directory instance. """ temp_dir = self._temp_dir_factory() self._temp_dirs.append(temp_dir) return temp_dir def cleanup(self): - """Limpa todos os diretórios temporários""" + """ + Cleans up all temporary directories created by this manager. + + This method removes all temporary directories created with + create_temp_dir, suppressing any OS errors that might occur + during deletion. + + Returns: + None + """ for temp_dir in self._temp_dirs: with suppress(OSError): shutil.rmtree(temp_dir.name) @@ -118,13 +237,19 @@ class BrowserOptionsManager: @staticmethod def initialize_options(options: Options | None) -> Options: """ - Inicializa as opções para o navegador. + Initializes options for the browser. + + This method ensures that a valid Options instance is available, + creating a default one if necessary. Args: - options (Options | None): Uma instância da classe Options ou None. + options (Options | None): An Options instance or None. Returns: - Options: A instância de opções inicializada. + Options: An initialized Options instance. + + Raises: + ValueError: If options is not None and not an instance of Options. """ if options is None: return Options() @@ -134,20 +259,37 @@ def initialize_options(options: Options | None) -> Options: @staticmethod def add_default_arguments(options: Options): - """Adiciona argumentos padrão aos argumentos fornecidos""" + """ + Adds default arguments to the provided options. + + This method appends standard browser arguments that improve + reliability and automation performance. + + Args: + options (Options): The options instance to modify. + + Returns: + None + """ options.arguments.append('--no-first-run') options.arguments.append('--no-default-browser-check') @staticmethod def validate_browser_path(path: str) -> str: """ - Valida o caminho fornecido do navegador. + Validates the provided browser executable path. + + This method checks if the browser executable file exists at + the specified path. Args: - path (str): O caminho do arquivo executável do navegador. + path (str): The path to the browser executable. Returns: - str: O caminho do navegador validado. + str: The validated browser path if it exists. + + Raises: + ValueError: If the browser executable is not found at the path. """ if not os.path.exists(path): raise ValueError(f'Browser not found: {path}') diff --git a/pydoll/browser/page.py b/pydoll/browser/page.py index 42812b7e..6ef2b610 100644 --- a/pydoll/browser/page.py +++ b/pydoll/browser/page.py @@ -21,8 +21,8 @@ def __init__(self, connection_port: int, page_id: str): Initializes the Page instance. Args: - connection_handler (ConnectionHandler): The connection handler - instance. + connection_port (int): The port number for the connection to the + browser. page_id (str): The ID of the page, obtained via the DevTools Protocol. """ @@ -102,6 +102,11 @@ async def page_source(self) -> str: async def close(self): """ Closes the page. + + This method closes the current page in the browser. + + Returns: + None """ await self._execute_command(PageCommands.close()) @@ -110,7 +115,8 @@ async def get_cookies(self) -> list[dict]: Retrieves the cookies of the page. Returns: - list: A list of cookies. + list[dict]: A list of dictionaries containing cookie data from + the current page. """ response = await self._execute_command( NetworkCommands.get_all_cookies() @@ -122,13 +128,20 @@ async def set_cookies(self, cookies: list[dict]): Sets cookies for the page. Args: - cookies (list): A list of cookies to set. + cookies (list[dict]): A list of dictionaries containing cookie + data to set for the current page. """ await self._execute_command(NetworkCommands.set_cookies(cookies)) async def delete_all_cookies(self): """ Deletes all cookies from the browser. + + This clears both storage cookies and browser cookies associated with + the current page. + + Returns: + None """ await self._execute_command(StorageCommands.clear_cookies()) await self._execute_command(NetworkCommands.clear_browser_cookies()) @@ -172,6 +185,12 @@ async def go_to(self, url: str, timeout=300): Args: url (str): The URL to navigate to. + timeout (int): Maximum time in seconds to wait for page to load. + Defaults to 300 seconds. + + Raises: + TimeoutError: If the page fails to load within the specified + timeout. """ if await self._refresh_if_url_not_changed(url): return @@ -186,6 +205,16 @@ async def go_to(self, url: str, timeout=300): async def refresh(self): """ Refreshes the page. + + This method reloads the current page and waits for it to finish + loading. + + Raises: + TimeoutError: If the page does not finish loading within the + default timeout period (300 seconds). + + Returns: + None """ await self._execute_command(PageCommands.refresh()) try: @@ -199,6 +228,9 @@ async def get_screenshot(self, path: str): Args: path (str): The file path to save the screenshot to. + + Returns: + None """ response = await self._execute_command(PageCommands.screenshot()) screenshot_b64 = response['result']['data'].encode('utf-8') @@ -243,10 +275,14 @@ async def get_network_logs(self, matches: list[str] = []): Retrieves network logs from the page. Args: - matches (str): The URL pattern to match network logs against. + matches (list[str]): A list of URL patterns to match network logs + against. If empty, all logs are returned. Returns: - list: A list of network logs that match the specified pattern. + list: A list of network logs that match the specified patterns. + + Raises: + LookupError: If no network logs match the specified patterns. """ network_logs = self._connection_handler.network_logs logs_matched = [] @@ -296,7 +332,9 @@ async def get_network_response_body(self, request_id: str): request_id (str): The ID of the network request. Returns: - str: The response body of the network request. + tuple: A tuple containing: + - str: The response body content + - bool: Flag indicating if the body is base64 encoded """ response = await self._execute_command( NetworkCommands.get_response_body(request_id) @@ -309,6 +347,13 @@ async def get_network_response_body(self, request_id: str): async def enable_page_events(self): """ Enables page events for the page. + + This allows listening for page-related events such as load, navigate, + and content change events. These events can be captured with the `on` + method. + + Returns: + None """ await self._execute_command(PageCommands.enable_page()) self._page_events_enabled = True @@ -316,6 +361,12 @@ async def enable_page_events(self): async def enable_network_events(self): """ Enables network events for the page. + + This allows listening for network-related events such as request and + response events. These events can be captured with the `on` method. + + Returns: + None """ await self._execute_command(NetworkCommands.enable_network_events()) self._network_events_enabled = True @@ -325,6 +376,17 @@ async def enable_fetch_events( ): """ Enables fetch events for the page. + + This allows interception of network requests before they are sent. + + Args: + handle_auth (bool): Whether to handle authentication requests. + Defaults to False. + resource_type (str): The type of resource to intercept. + Defaults to 'Document'. + + Returns: + None """ await self._execute_command( FetchCommands.enable_fetch_events(handle_auth, resource_type) @@ -334,6 +396,13 @@ async def enable_fetch_events( async def enable_dom_events(self): """ Enables DOM events for the page. + + This allows listening for DOM-related events such as node creation, + attribute modification, and node removal events. These events can be + captured with the `on` method. + + Returns: + None """ await self._execute_command(DomCommands.enable_dom_events()) self._dom_events_enabled = True @@ -341,6 +410,12 @@ async def enable_dom_events(self): async def disable_fetch_events(self): """ Disables fetch events for the page. + + This stops the interception of network requests that was previously + enabled with enable_fetch_events(). + + Returns: + None """ await self._execute_command(FetchCommands.disable_fetch_events()) self._fetch_events_enabled = False @@ -348,6 +423,12 @@ async def disable_fetch_events(self): async def disable_page_events(self): """ Disables page events for the page. + + This stops listening for page-related events that were previously + enabled with enable_page_events(). + + Returns: + None """ await self._execute_command(PageCommands.disable_page()) self._page_events_enabled = False @@ -359,10 +440,15 @@ async def on( Registers an event listener for the page. Args: - event (str): The event to listen for. + event_name (str): The event name to listen for. callback (callable): The callback function to execute when the event is triggered. - temporary (bool): Whether the event listener is temporary or not. + temporary (bool): If True, the callback will be removed after it's + triggered once. Defaults to False. + + Returns: + int: The ID of the registered callback, which can be used to + remove the listener later. """ async def callback_wrapper(event): @@ -392,6 +478,12 @@ async def execute_script(self, script: str, element: WebElement = None): Args: script (str): The JavaScript script to execute. + element (WebElement, optional): The element to execute the script + on. Use 'argument' in your script to refer to this element. + Defaults to None. + + Returns: + dict: The result of the script execution from the browser. """ if element: script = script.replace('argument', 'this') @@ -420,6 +512,14 @@ async def _refresh_if_url_not_changed(self, url: str): async def _wait_page_load(self, timeout: int = 300): """ Waits for the page to finish loading. + + Args: + timeout (int): Maximum time in seconds to wait for the page + to load. Defaults to 300 seconds. + + Raises: + asyncio.TimeoutError: If the page does not finish loading within + the specified timeout. """ start_time = asyncio.get_event_loop().time() while True: diff --git a/pydoll/commands/dom.py b/pydoll/commands/dom.py index ec60cf61..9351439d 100644 --- a/pydoll/commands/dom.py +++ b/pydoll/commands/dom.py @@ -7,14 +7,16 @@ class DomCommands: """ - A class to define commands for interacting with the Document - Object Model (DOM) using the Chrome DevTools Protocol (CDP). - The commands allow for various operations on DOM nodes, - such as enabling the DOM domain, retrieving the - DOM document, describing nodes, and querying elements. + A class for interacting with the Document Object Model (DOM) using the + Chrome DevTools Protocol. + + This class provides methods to interact with DOM nodes through CDP + commands, including enabling the DOM domain, retrieving document + structure, querying elements, and manipulating DOM nodes. Attributes: - SelectorType (Literal): A type definition for supported selector types. + SelectorType (Literal): Supported selector types for finding elements + in the DOM. """ SelectorType = Literal[ @@ -43,14 +45,30 @@ class DomCommands: @classmethod def scroll_into_view(cls, object_id: str) -> dict: - """Generates the command to scroll a specific DOM node into view.""" + """ + Generates a command to scroll a specific DOM node into view. + + Args: + object_id (str): The object ID of the DOM node to scroll into view. + + Returns: + dict: The CDP command to scroll the node into view. + """ command = copy.deepcopy(cls.SCROLL_INTO_VIEW_IF_NEEDED) command['params']['objectId'] = object_id return command @classmethod def get_outer_html(cls, object_id: int) -> dict: - """Generates the command to get the outer HTML""" + """ + Generates a command to get the outer HTML of a DOM node. + + Args: + object_id (int): The object ID of the DOM node. + + Returns: + dict: The CDP command to retrieve the outer HTML. + """ command = copy.deepcopy(cls.GET_OUTER_HTML) command['params']['objectId'] = object_id return command @@ -58,21 +76,39 @@ def get_outer_html(cls, object_id: int) -> dict: @classmethod def dom_document(cls) -> dict: """ - Generates the command to get the root DOM node of the current page. + Generates a command to get the root DOM node of the current page. + + Returns: + dict: The CDP command to retrieve the DOM document. """ return cls.DOM_DOCUMENT @classmethod def request_node(cls, object_id: str) -> dict: - """Generates the command to request a specific DOM node by its object - ID.""" + """ + Generates a command to request a specific DOM node by its object ID. + + Args: + object_id (str): The object ID of the DOM node to request. + + Returns: + dict: The CDP command to request the node. + """ command = copy.deepcopy(cls.REQUEST_NODE_TEMPLATE) command['params']['objectId'] = object_id return command @classmethod def describe_node(cls, object_id: str) -> dict: - """Generates the command to describe a specific DOM node.""" + """ + Generates a command to describe a specific DOM node. + + Args: + object_id (str): The object ID of the DOM node to describe. + + Returns: + dict: The CDP command to describe the node. + """ command = copy.deepcopy(cls.DESCRIBE_NODE_TEMPLATE) command['params']['objectId'] = object_id return command @@ -80,7 +116,13 @@ def describe_node(cls, object_id: str) -> dict: @classmethod def box_model(cls, object_id: str) -> dict: """ - Generates the command to get the box model of a specific DOM node. + Generates a command to get the box model of a specific DOM node. + + Args: + object_id (str): The object ID of the DOM node. + + Returns: + dict: The CDP command to retrieve the box model. """ command = copy.deepcopy(cls.BOX_MODEL_TEMPLATE) command['params']['objectId'] = object_id @@ -88,12 +130,22 @@ def box_model(cls, object_id: str) -> dict: @classmethod def enable_dom_events(cls) -> dict: - """Generates the command to enable the DOM domain.""" + """ + Generates a command to enable the DOM domain in CDP. + + Returns: + dict: The CDP command to enable the DOM domain. + """ return cls.ENABLE @classmethod def get_current_url(cls) -> dict: - """Generates the command to get the current URL of the page.""" + """ + Generates a command to get the current URL of the page. + + Returns: + dict: The CDP command to retrieve the current URL. + """ return RuntimeCommands.evaluate_script('window.location.href') @classmethod @@ -103,8 +155,21 @@ def find_element( value: str, object_id: str = '', ) -> dict: - """Generates a command to find a DOM element based on the specified - criteria.""" + """ + Generates a command to find a DOM element based on the specified + criteria. + + Args: + by (SelectorType): The selector strategy to use + (CSS_SELECTOR, XPATH, etc.). + value (str): The selector value to search for. + object_id (str, optional): The object ID of a node to + search within. If provided, the search is relative to + this node. Defaults to empty string. + + Returns: + dict: The CDP command to find the element. + """ escaped_value = value.replace('"', '\\"') match by: case By.CLASS_NAME: @@ -137,8 +202,21 @@ def find_elements( value: str, object_id: str = '', ) -> dict: - """Generates a command to find multiple DOM elements based on the - specified criteria.""" + """ + Generates a command to find multiple DOM elements based on the + specified criteria. + + Args: + by (SelectorType): The selector strategy to use + (CSS_SELECTOR, XPATH, etc.). + value (str): The selector value to search for. + object_id (str, optional): The object ID of a node to + search within. If provided, the search is relative to + this node. Defaults to empty string. + + Returns: + dict: The CDP command to find the elements. + """ escaped_value = value.replace('"', '\\"') match by: case By.CLASS_NAME: @@ -166,7 +244,17 @@ def find_elements( @classmethod def _find_element_by_xpath(cls, xpath: str, object_id: str) -> dict: - """Creates a command to find a DOM element by XPath.""" + """ + Creates a command to find a DOM element by XPath. + + Args: + xpath (str): The XPath expression to evaluate. + object_id (str): The object ID of a node to search within. + If provided, the search is relative to this node. + + Returns: + dict: The CDP command to find the element using XPath. + """ escaped_value = xpath.replace('"', '\\"') if object_id: escaped_value = cls._ensure_relative_xpath(escaped_value) @@ -187,7 +275,17 @@ def _find_element_by_xpath(cls, xpath: str, object_id: str) -> dict: @classmethod def _find_elements_by_xpath(cls, xpath: str, object_id: str) -> dict: - """Creates a command to find multiple DOM elements by XPath.""" + """ + Creates a command to find multiple DOM elements by XPath. + + Args: + xpath (str): The XPath expression to evaluate. + object_id (str): The object ID of a node to search within. + If provided, the search is relative to this node. + + Returns: + dict: The CDP command to find multiple elements using XPath. + """ escaped_value = xpath.replace('"', '\\"') if object_id: escaped_value = cls._ensure_relative_xpath(escaped_value) @@ -208,5 +306,14 @@ def _find_elements_by_xpath(cls, xpath: str, object_id: str) -> dict: @staticmethod def _ensure_relative_xpath(xpath: str) -> str: - """Ensures that the XPath expression is relative.""" + """ + Ensures that the XPath expression is relative. + + Args: + xpath (str): The XPath expression to check and possibly modify. + + Returns: + str: The XPath expression with a prepended dot if necessary + to make it relative. + """ return f'.{xpath}' if not xpath.startswith('.') else xpath diff --git a/pydoll/commands/network.py b/pydoll/commands/network.py index 70a571ec..87e96d6d 100644 --- a/pydoll/commands/network.py +++ b/pydoll/commands/network.py @@ -62,6 +62,12 @@ def clear_browser_cache(cls): This is useful when you want to ensure that your application retrieves the most up-to-date resources from the server instead of loading potentially stale data from the cache. + + Args: + None + + Returns: + dict: A command to clear the browser's cache. """ return cls.CLEAR_BROWSER_CACHE @@ -73,6 +79,12 @@ def clear_browser_cookies(cls): This can be beneficial for testing scenarios where you need to simulate a fresh user session without any previously stored cookies that might affect the application's behavior. + + Args: + None + + Returns: + dict: A command to clear all cookies in the browser. """ return cls.CLEAR_BROWSER_COOKIES @@ -106,6 +118,12 @@ def disable_network_events(cls): network events, which can be useful during specific operations where you don't want to be notified about every network request and response. + + Args: + None + + Returns: + dict: A command to disable network event notifications. """ return cls.DISABLE @@ -117,6 +135,12 @@ def enable_network_events(cls): This allows you to start receiving network-related events again after they have been disabled. It's essential to call this before you expect to receive network events. + + Args: + None + + Returns: + dict: A command to enable network event notifications. """ return cls.ENABLE @@ -279,6 +303,12 @@ def get_all_cookies(cls): This can be useful for diagnostics, testing, or ensuring that your application behaves as expected when accessing cookies. + + Args: + None + + Returns: + dict: A command to retrieve all cookies in the browser. """ return cls.GET_ALL_COOKIES diff --git a/pydoll/commands/runtime.py b/pydoll/commands/runtime.py index 7a0315b6..69e71f16 100644 --- a/pydoll/commands/runtime.py +++ b/pydoll/commands/runtime.py @@ -2,6 +2,21 @@ class RuntimeCommands: + """ + A class for interacting with the JavaScript runtime using Chrome + DevTools Protocol. + + This class provides methods to create commands for evaluating JavaScript + expressions, calling functions on JavaScript objects, and retrieving + object properties through CDP. + + Attributes: + EVALUATE_TEMPLATE (dict): Template for the Runtime.evaluate command. + CALL_FUNCTION_ON_TEMPLATE (dict): Template for the + Runtime.callFunctionOn command. + GET_PROPERTIES (dict): Template for the Runtime.getProperties command. + """ + EVALUATE_TEMPLATE = {'method': 'Runtime.evaluate', 'params': {}} CALL_FUNCTION_ON_TEMPLATE = { 'method': 'Runtime.callFunctionOn', @@ -14,7 +29,16 @@ class RuntimeCommands: @classmethod def get_properties(cls, object_id: str) -> dict: - """Generates the command to get the properties of a specific object.""" + """ + Generates a command to get the properties of a specific + JavaScript object. + + Args: + object_id (str): The object ID of the JavaScript object. + + Returns: + dict: The CDP command to retrieve the object's properties. + """ command = copy.deepcopy(cls.GET_PROPERTIES) command['params']['objectId'] = object_id command['params']['ownProperties'] = True @@ -27,7 +51,22 @@ def call_function_on( function_declaration: str, return_by_value: bool = False, ) -> dict: - """Generates the command to call a function on a specific object.""" + """ + Generates a command to call a function on a specific + JavaScript object. + + Args: + object_id (str): The object ID of the JavaScript object to call + the function on. + function_declaration (str): The JavaScript function to execute + on the object. + return_by_value (bool, optional): Whether to return the result by + value instead of as a remote object reference. Defaults to + False. + + Returns: + dict: The CDP command to call the function on the specified object. + """ command = copy.deepcopy(cls.CALL_FUNCTION_ON_TEMPLATE) command['params']['objectId'] = object_id command['params']['functionDeclaration'] = function_declaration @@ -36,7 +75,15 @@ def call_function_on( @classmethod def evaluate_script(cls, expression: str) -> dict: - """Generates the command to evaluate JavaScript code.""" + """ + Generates a command to evaluate JavaScript code in the browser context. + + Args: + expression (str): The JavaScript expression to evaluate. + + Returns: + dict: The CDP command to evaluate the JavaScript expression. + """ command = copy.deepcopy(cls.EVALUATE_TEMPLATE) command['params'] = { 'expression': expression, diff --git a/pydoll/commands/storage.py b/pydoll/commands/storage.py index a5521846..3543182e 100644 --- a/pydoll/commands/storage.py +++ b/pydoll/commands/storage.py @@ -1,18 +1,54 @@ class StorageCommands: + """ + A class for interacting with browser storage using + Chrome DevTools Protocol. + + This class provides methods to create commands for managing cookies + in the browser, including retrieving, setting, and clearing cookies + through CDP commands. + + Attributes: + CLEAR_COOKIES (dict): Template for the Storage.clearCookies command. + SET_COOKIES (dict): Template for the Storage.setCookies command. + GET_COOKIES (dict): Template for the Storage.getCookies command. + """ + CLEAR_COOKIES = {'method': 'Storage.clearCookies', 'params': {}} SET_COOKIES = {'method': 'Storage.setCookies', 'params': {}} GET_COOKIES = {'method': 'Storage.getCookies', 'params': {}} @classmethod def clear_cookies(cls) -> dict: + """ + Generates a command to clear all browser cookies. + + Returns: + dict: The CDP command to clear all cookies. + """ return cls.CLEAR_COOKIES @classmethod def set_cookies(cls, cookies: list) -> dict: + """ + Generates a command to set browser cookies. + + Args: + cookies (list): A list of cookie objects to be set in the browser. + Each cookie object should follow the CDP cookie format. + + Returns: + dict: The CDP command to set the specified cookies. + """ set_cookies = cls.SET_COOKIES.copy() set_cookies['params']['cookies'] = cookies return set_cookies @classmethod def get_cookies(cls) -> dict: + """ + Generates a command to retrieve all browser cookies. + + Returns: + dict: The CDP command to get all cookies. + """ return cls.GET_COOKIES diff --git a/pydoll/commands/target.py b/pydoll/commands/target.py index 2547af20..31615a0f 100644 --- a/pydoll/commands/target.py +++ b/pydoll/commands/target.py @@ -1,4 +1,26 @@ class TargetCommands: + """ + A class for managing browser targets using Chrome DevTools Protocol. + + This class provides methods to create commands for interacting with + browser targets, including creating, activating, attaching to, and closing + targets through CDP commands. + + Attributes: + ACTIVATE_TARGET (dict): Template for the + Target.activateTarget command. + ATTACH_TO_TARGET (dict): Template for the + Target.attachToTarget command. + CLOSE_TARGET (dict): Template for the + Target.closeTarget command. + CREATE_TARGET (dict): Template for the + Target.createTarget command. + GET_TARGETS (dict): Template for the + Target.getTargets command. + GET_TARGET_INFO (dict): Template for the + Target.getTargetInfo command. + """ + ACTIVATE_TARGET = {'method': 'Target.activateTarget', 'params': {}} ATTACH_TO_TARGET = {'method': 'Target.attachToTarget', 'params': {}} CLOSE_TARGET = {'method': 'Target.closeTarget', 'params': {}} @@ -8,28 +30,72 @@ class TargetCommands: @classmethod def activate_target(cls, target_id: str) -> dict: + """ + Generates a command to activate a specific browser target. + + Args: + target_id (str): The ID of the target to activate. + + Returns: + dict: The CDP command to activate the target. + """ activate_target = cls.ATTACH_TO_TARGET.copy() activate_target['params']['targetId'] = target_id return activate_target @classmethod def attach_to_target(cls, target_id: str) -> dict: + """ + Generates a command to attach to a specific browser target. + + Args: + target_id (str): The ID of the target to attach to. + + Returns: + dict: The CDP command to attach to the target. + """ attach_to_target = cls.ATTACH_TO_TARGET.copy() attach_to_target['params']['targetId'] = target_id return attach_to_target @classmethod def close_target(cls, target_id: str) -> dict: + """ + Generates a command to close a specific browser target. + + Args: + target_id (str): The ID of the target to close. + + Returns: + dict: The CDP command to close the target. + """ close_target = cls.CLOSE_TARGET.copy() close_target['params']['targetId'] = target_id return close_target @classmethod def create_target(cls, url: str) -> dict: + """ + Generates a command to create a new browser target with the + specified URL. + + Args: + url (str): The URL to navigate to in the new target. + + Returns: + dict: The CDP command to create a new target. + """ create_target = cls.CREATE_TARGET.copy() create_target['params']['url'] = url return create_target @classmethod def get_targets(cls) -> dict: + """ + Generates a command to retrieve information about all + available targets. + + Returns: + dict: The CDP command to get all targets. + """ return cls.GET_TARGETS diff --git a/pydoll/connection/connection.py b/pydoll/connection/connection.py index 0850413f..ee13f48e 100644 --- a/pydoll/connection/connection.py +++ b/pydoll/connection/connection.py @@ -33,9 +33,15 @@ def __init__( Args: connection_port (int): The port to connect to the browser. + page_id (str): The ID of the page to connect to. Use 'browser' + for browser-level connections. Defaults to 'browser'. + ws_address_resolver (Callable): Function to resolve WebSocket + address from port. Defaults to get_browser_ws_address. + ws_connector (Callable): Function to establish WebSocket + connections. Defaults to websockets.connect. - Sets up the internal state including WebSocket addresses, - connection instance, event callbacks, and command ID. + Returns: + None """ self._connection_port = connection_port self._page_id = page_id @@ -48,10 +54,29 @@ def __init__( @property def network_logs(self): + """ + Gets all network logs captured by the connection. + + This property provides access to network request and response logs + that have been captured during the browser session. + + Returns: + list: A list of network log entries. + """ return self._events_handler.network_logs @property def dialog(self): + """ + Gets information about the current dialog in the page, if any. + + This property provides access to any active dialog (alert, confirm, + prompt) that might be present in the page. + + Returns: + dict or None: Dialog information if a dialog is present, + None otherwise. + """ return self._events_handler.dialog async def ping(self) -> bool: @@ -106,14 +131,46 @@ async def execute_command(self, command: dict, timeout: int = 10) -> dict: async def register_callback( self, event_name: str, callback: Callable, temporary: bool = False ): + """ + Registers a callback function for a specific event. + + Args: + event_name (str): The name of the event to listen for. + callback (Callable): The function to call when the event occurs. + temporary (bool): If True, the callback will be removed after it's + triggered once. Defaults to False. + + Returns: + int: The ID of the registered callback, which can be used to + remove the listener later. + """ return self._events_handler.register_callback( event_name, callback, temporary ) async def remove_callback(self, callback_id: int): + """ + Removes a registered event callback by its ID. + + Args: + callback_id (int): The ID of the callback to remove. + + Returns: + bool: True if the callback was successfully removed, + False otherwise. + """ return self._events_handler.remove_callback(callback_id) async def clear_callbacks(self): + """ + Removes all registered event callbacks. + + This method clears all event listeners that have been registered with + the register_callback method. + + Returns: + None + """ return self._events_handler.clear_callbacks() async def close(self): @@ -127,12 +184,28 @@ async def close(self): logger.info('WebSocket connection closed.') async def _ensure_active_connection(self): - """Guarantee an active connection exists.""" + """ + Guarantees that an active connection exists before proceeding. + + This method checks if the WebSocket connection is established + and active. If not, it establishes a new connection. + + Returns: + None + """ if self._ws_connection is None or self._ws_connection.closed: await self._establish_new_connection() async def _establish_new_connection(self): - """Create fresh connection and start listening.""" + """ + Creates a fresh WebSocket connection and starts listening for events. + + This method resolves the appropriate WebSocket address, establishes + a new connection, and initiates an asynchronous task to receive events. + + Returns: + None + """ ws_address = await self._resolve_ws_address() logger.info(f'Connecting to {ws_address}') self._ws_connection = await self._ws_connector(ws_address) @@ -140,7 +213,15 @@ async def _establish_new_connection(self): logger.debug('WebSocket connection established') async def _resolve_ws_address(self): - """Determine correct WebSocket address.""" + """ + Determines the correct WebSocket address based on the page ID. + + This method resolves the WebSocket URL differently depending on whether + the connection is to the browser itself or a specific page. + + Returns: + str: The WebSocket URL to connect to. + """ if 'browser' in self._page_id: return await self._ws_address_resolver(self._connection_port) return ( @@ -149,7 +230,15 @@ async def _resolve_ws_address(self): ) async def _handle_connection_loss(self): - """Clean up after connection loss.""" + """ + Cleans up resources after a WebSocket connection loss. + + This method closes the connection if it's still open, nullifies the + connection reference, and cancels any ongoing receive tasks. + + Returns: + None + """ if self._ws_connection and not self._ws_connection.closed: await self._ws_connection.close() self._ws_connection = None @@ -174,12 +263,32 @@ async def _receive_events(self): raise async def _incoming_messages(self): - """Generator that yields raw messages while connection is open""" + """ + Generator that yields raw messages from the WebSocket connection. + + This asynchronous generator continuously receives messages from the + WebSocket connection as long as it remains open. + + Yields: + str: The raw message string received from the WebSocket. + """ while not self._ws_connection.closed: yield await self._ws_connection.recv() async def _process_single_message(self, raw_message: str): - """Orchestrates processing of a single raw WebSocket message""" + """ + Orchestrates the processing of a single raw WebSocket message. + + This method parses the raw message string into a JSON object and + routes it to the appropriate handler based on whether it's a command + response or an event notification. + + Args: + raw_message (str): The raw message string to process. + + Returns: + None + """ message = self._parse_message(raw_message) if not message: return @@ -194,6 +303,12 @@ def _parse_message(raw_message: str) -> dict | None: """ Attempts to parse raw message string into JSON. Returns parsed dict or None if parsing fails. + + Args: + raw_message (str): The raw message string to parse. + + Returns: + dict | None: The parsed JSON object if successful, None otherwise. """ try: return json.loads(raw_message) @@ -203,30 +318,95 @@ def _parse_message(raw_message: str) -> dict | None: @staticmethod def _is_command_response(message: dict) -> bool: - """Determines if message is a response to a command""" + """ + Determines if a message is a response to a previously sent command. + + Command responses are identified by having an integer 'id' field, + which corresponds to the ID of the original command. + + Args: + message (dict): The message to check. + + Returns: + bool: True if the message is a command response, False otherwise. + """ return 'id' in message and isinstance(message['id'], int) async def _handle_command_message(self, message: dict): - """Processes messages that are command responses""" + """ + Processes messages that are responses to previously sent commands. + + This method resolves the future associated with the command ID, + allowing the calling code to continue execution with the response. + + Args: + message (dict): The command response message to process. + + Returns: + None + """ logger.debug(f'Processing command response: {message.get("id")}') self._command_manager.resolve_command( message['id'], json.dumps(message) ) async def _handle_event_message(self, message: dict): - """Processes messages that are spontaneous events""" + """ + Processes messages that are spontaneous event notifications. + + This method delegates event processing to the events handler, + which will invoke any registered callbacks for the event type. + + Args: + message (dict): The event message to process. + + Returns: + None + """ event_type = message.get('method', 'unknown-event') logger.debug(f'Processing {event_type} event') await self._events_handler.process_event(message) def __repr__(self): + """ + Returns a string representation of the ConnectionHandler for debugging. + + Returns: + str: A string representation of the ConnectionHandler. + """ return f'ConnectionHandler(port={self._connection_port})' def __str__(self): + """ + Returns a user-friendly string representation of the ConnectionHandler. + + Returns: + str: A string representation of the ConnectionHandler. + """ return f'ConnectionHandler(port={self._connection_port})' async def __aenter__(self): + """ + Async context manager entry point. + + Returns: + ConnectionHandler: The ConnectionHandler instance. + """ return self async def __aexit__(self, exc_type, exc_val, exc_tb): + """ + Async context manager exit point. + + This method ensures the connection is properly closed when + exiting the context manager. + + Args: + exc_type: The exception type, if raised. + exc_val: The exception value, if raised. + exc_tb: The traceback, if an exception was raised. + + Returns: + None + """ await self.close() diff --git a/pydoll/connection/managers.py b/pydoll/connection/managers.py index efe0fba2..53446376 100644 --- a/pydoll/connection/managers.py +++ b/pydoll/connection/managers.py @@ -8,11 +8,42 @@ class CommandManager: + """ + Manages the lifecycle of commands sent to the browser. + + This class handles the creation of command futures, command ID generation, + and resolution of command responses. It maintains a mapping of command IDs + to their corresponding futures, allowing asynchronous command execution. + """ + def __init__(self): + """ + Initializes the CommandManager. + + Sets up internal state for tracking pending commands and + initializes the command ID counter. + + Returns: + None + """ self._pending_commands: dict[int, asyncio.Future] = {} self._id = 1 def create_command_future(self, command: dict) -> asyncio.Future: + """ + Creates a future for a command and assigns it a unique ID. + + This method assigns a unique ID to the command, creates a future + to track its completion, and stores the future in the pending + commands dictionary. + + Args: + command (dict): The command to prepare for execution. + + Returns: + asyncio.Future: A future that will be resolved when the command + completes. + """ command['id'] = self._id future = asyncio.Future() self._pending_commands[self._id] = future @@ -20,16 +51,35 @@ def create_command_future(self, command: dict) -> asyncio.Future: return future def resolve_command(self, response_id: int, result: str): + """ + Resolves a pending command with its result. + + This method sets the result for the future associated with the + command ID and removes it from the pending commands dictionary. + + Args: + response_id (int): The ID of the command to resolve. + result (str): The result data for the command. + + Returns: + None + """ if response_id in self._pending_commands: self._pending_commands[response_id].set_result(result) del self._pending_commands[response_id] def remove_pending_command(self, command_id: int): """ - Remove um comando pendente sem resolvê-lo (útil para timeouts). + Removes a pending command without resolving it. + + This method is useful for handling timeouts or cancellations, + allowing cleanup of command futures that will never be resolved. Args: - command_id: ID do comando a ser removido + command_id (int): The ID of the command to remove. + + Returns: + None """ if command_id in self._pending_commands: del self._pending_commands[command_id] @@ -37,10 +87,24 @@ def remove_pending_command(self, command_id: int): class EventsHandler: """ - Gerencia registro de callbacks, processamento de eventos e logs de rede. + Manages event callbacks, event processing, and network logs. + + This class is responsible for registering event callbacks, triggering them + when events are received, and maintaining state related to events such as + network logs and dialog information. """ def __init__(self): + """ + Initializes the EventsHandler. + + Sets up internal state for tracking event callbacks, initializes + the callback ID counter, and creates empty collections for network + logs and dialog information. + + Returns: + None + """ self._event_callbacks: Dict[int, dict] = {} self._callback_id = 0 self.network_logs = [] @@ -51,10 +115,23 @@ def register_callback( self, event_name: str, callback: Callable, temporary: bool = False ) -> int: """ - Registra um callback para um tipo específico de evento. + Registers a callback for a specific event type. + + This method associates a callback function with an event name, + allowing the function to be called whenever that event occurs. - Retorna: - int: ID do callback registrado + Args: + event_name (str): The name of the event to listen for. + callback (Callable): The function to call when the event occurs. + temporary (bool): If True, the callback will be removed after it's + triggered once. Defaults to False. + + Returns: + int: The ID of the registered callback, which can be used to + remove it later. + + Raises: + InvalidCallback: If the callback is not callable. """ if not callable(callback): logger.error('Callback must be a callable function.') @@ -72,7 +149,19 @@ def register_callback( return self._callback_id def remove_callback(self, callback_id: int) -> bool: - """Remove um callback pelo ID.""" + """ + Removes a callback by its ID. + + This method removes a previously registered callback from the + event handler, preventing it from being triggered in the future. + + Args: + callback_id (int): The ID of the callback to remove. + + Returns: + bool: True if the callback was successfully removed, False if + the callback ID was not found. + """ if callback_id not in self._event_callbacks: logger.warning(f'Callback ID {callback_id} not found') return False @@ -82,21 +171,35 @@ def remove_callback(self, callback_id: int) -> bool: return True def clear_callbacks(self): - """Reseta todos os callbacks registrados.""" + """ + Removes all registered callbacks. + + This method clears all event listeners that have been registered, + effectively resetting the event handler to its initial state. + + Returns: + None + """ self._event_callbacks.clear() logger.info('All callbacks cleared') async def process_event(self, event_data: dict): """ - Processa um evento recebido e dispara os callbacks correspondentes. + Processes a received event and triggers corresponding callbacks. + + This method handles special events like network requests and dialogs, + updating internal state accordingly, and then triggers any callbacks + registered for the event type. Args: - event_data: Dados do evento no formato dicionário + event_data (dict): The event data in dictionary format. + + Returns: + None """ event_name = event_data.get('method') logger.debug(f'Processing event: {event_name}') - # Atualiza logs de rede se necessário if 'Network.requestWillBeSent' in event_name: self._update_network_logs(event_data) @@ -106,16 +209,40 @@ async def process_event(self, event_data: dict): if 'Page.javascriptDialogClosed' in event_name: self.dialog = {} - # Processa callbacks await self._trigger_callbacks(event_name, event_data) def _update_network_logs(self, event_data: dict): - """Mantém os logs de rede atualizados.""" + """ + Maintains the network logs collection. + + This method adds a new network event to the logs and ensures + the collection doesn't grow too large by limiting its size. + + Args: + event_data (dict): The network event data to add to the logs. + + Returns: + None + """ self.network_logs.append(event_data) self.network_logs = self.network_logs[-10000:] # Mantém tamanho máximo async def _trigger_callbacks(self, event_name: str, event_data: dict): - """Dispara todos os callbacks registrados para o evento.""" + """ + Triggers all registered callbacks for an event. + + This method iterates through all registered callbacks for the + specified event name and invokes them with the event data. + It also handles temporary callbacks by removing them after they're + triggered. + + Args: + event_name (str): The name of the event that occurred. + event_data (dict): The data associated with the event. + + Returns: + None + """ callbacks_to_remove = [] for cb_id, cb_data in list(self._event_callbacks.items()): @@ -131,6 +258,5 @@ async def _trigger_callbacks(self, event_name: str, event_data: dict): if cb_data['temporary']: callbacks_to_remove.append(cb_id) - # Remove callbacks temporários após processamento for cb_id in callbacks_to_remove: self.remove_callback(cb_id) diff --git a/pydoll/element.py b/pydoll/element.py index a349228a..5ab99811 100644 --- a/pydoll/element.py +++ b/pydoll/element.py @@ -16,6 +16,14 @@ class WebElement(FindElementsMixin): + """ + Represents a DOM element in the browser. + + This class provides methods to interact with and retrieve information about + DOM elements. It allows operations such as clicking, sending keys, getting + attributes, and other common web element interactions. It inherits element + finding capabilities from FindElementsMixin. + """ def __init__( self, object_id: str, @@ -28,8 +36,15 @@ def __init__( Initializes the WebElement instance. Args: - node (dict): The node description from the browser. - connection_handler (ConnectionHandler): The connection instance. + object_id (str): The unique object ID for this DOM element. + connection_handler (ConnectionHandler): The connection instance to + communicate with the browser. + method (str, optional): The search method used to find this + element. Defaults to None. + selector (str, optional): The selector string used to find this + element. Defaults to None. + attributes_list (list, optional): List of attribute name-value + pairs. Defaults to an empty list. """ self._object_id = object_id self._search_method = method @@ -39,12 +54,36 @@ def __init__( self._def_attributes(attributes_list) def __repr__(self): + """ + Returns a string representation of the WebElement. + + The representation includes all attributes and the object ID, + making it useful for debugging and logging. + + Returns: + str: String representation of the WebElement. + """ attrs = ', '.join(f'{k}={v!r}' for k, v in self._attributes.items()) return ( f'{self.__class__.__name__}({attrs})(object_id={self._object_id})' ) def _def_attributes(self, attributes_list: list): + """ + Defines element attributes from a flat list of key-value pairs. + + This method processes an attribute list from the browser and + populates the element's attributes dictionary. It handles the + special case of renaming 'class' to 'class_name' to avoid conflicts + with Python's reserved keywords. + + Args: + attributes_list (list): A flat list of alternating attribute names + and values. + + Returns: + None + """ for i in range(0, len(attributes_list), 2): key = attributes_list[i] key = key if key != 'class' else 'class_name' @@ -64,13 +103,10 @@ def value(self) -> str: @property def class_name(self) -> str: """ - Retrieves the class name of the - element. + Retrieves the class name of the element. Returns: - str: The class name of the - element. - + str: The class name of the element. """ return self._attributes.get('class_name') @@ -97,10 +133,13 @@ def is_enabled(self) -> bool: @property async def bounds(self) -> list: """ - Asynchronously retrieves the bounding box of the element. + Asynchronously retrieves the bounding box coordinates of the element. + + This property uses the DevTools Protocol to get detailed positioning + information of the element. Returns: - dict: The bounding box of the element. + list: A list of points defining the element's bounding box. """ command = DomCommands.box_model(object_id=self._object_id) response = await self._execute_command(command) @@ -134,10 +173,19 @@ async def _execute_script( self, script: str, return_by_value: bool = False ): """ - Executes a JavaScript script on the element. + Executes a JavaScript script in the context of this element. + + This method allows executing JavaScript with the element as 'this', + enabling direct manipulation of the element using JavaScript. Args: script (str): The JavaScript script to execute. + return_by_value (bool): Whether to return the result by value. + If False, returns a reference. Defaults to False. + + Returns: + dict: The response from the browser containing the script execution + results. """ return await self._execute_command( RuntimeCommands.call_function_on( @@ -221,12 +269,32 @@ def get_attribute(self, name: str) -> str: async def scroll_into_view(self): """ - Scrolls the element into view. + Scrolls the element into the visible area of the browser window. + + This method ensures that the element is visible in the viewport + before performing actions like clicking or getting screenshots. + + Returns: + None """ command = DomCommands.scroll_into_view(object_id=self._object_id) await self._execute_command(command) async def click_using_js(self): + """ + Clicks on the element using JavaScript. + + This method uses JavaScript to trigger a click event on the element. + It's useful for elements that can't be clicked using normal mouse + events, or for elements that might be obscured. + + Raises: + ElementNotVisible: If the element is not visible on the page. + ElementNotInteractable: If the element could not be clicked. + + Returns: + None + """ if self._is_option_tag(): return await self.click_option_tag() @@ -247,6 +315,25 @@ async def click_using_js(self): ) async def click(self, x_offset: int = 0, y_offset: int = 0): + """ + Clicks on the element using mouse events. + + This method simulates a real mouse click by sending mouse press and + release events at the center of the element. It can also click at + an offset from the center if specified. + + Args: + x_offset (int): Horizontal offset from the center of the element. + Defaults to 0. + y_offset (int): Vertical offset from the center of the element. + Defaults to 0. + + Raises: + ElementNotVisible: If the element is not visible on the page. + + Returns: + None + """ if self._is_option_tag(): return await self.click_option_tag() @@ -278,6 +365,16 @@ async def click(self, x_offset: int = 0, y_offset: int = 0): await self._connection_handler.execute_command(release_command) async def click_option_tag(self): + """ + Clicks on an option element in a select dropdown. + + This method uses a specialized JavaScript approach to select an option + in a dropdown, as option elements require different handling than + standard clickable elements. + + Returns: + None + """ script = Scripts.CLICK_OPTION_TAG.replace('{self.value}', self.value) await self._execute_command(RuntimeCommands.evaluate_script(script)) @@ -302,10 +399,31 @@ async def type_keys(self, text: str): await asyncio.sleep(0.1) def _is_option_tag(self): + """ + Checks if the element is an option tag in a select dropdown. + + Returns: + bool: True if the element is an option tag, False otherwise. + """ return self._attributes['tag_name'].lower() == 'option' @staticmethod def _calculate_center(bounds: list) -> tuple: + """ + Calculates the center point of an element from its bounding box. + + This method processes the coordinates from a bounding box list and + computes the center point, which is useful for clicking operations. + + Args: + bounds (list): A list of coordinates defining the element's + bounding box. Expected format is + a flat list of [x1, y1, ..., xn, yn]. + + Returns: + tuple: A tuple containing the (x, y) coordinates of + the center point. + """ x_values = [bounds[i] for i in range(0, len(bounds), 2)] y_values = [bounds[i] for i in range(1, len(bounds), 2)] x_center = sum(x_values) / len(x_values) diff --git a/pydoll/mixins/find_elements.py b/pydoll/mixins/find_elements.py index 22914f8f..ea4dce83 100644 --- a/pydoll/mixins/find_elements.py +++ b/pydoll/mixins/find_elements.py @@ -7,7 +7,18 @@ def create_web_element(*args, **kwargs): """ - Creates a WebElement instance to avoid circular imports. + Creates a WebElement instance while avoiding circular imports. + + This function is used as a factory to create WebElement instances + by dynamically importing the WebElement class. This approach + prevents circular import issues that would occur with direct imports. + + Args: + *args: Positional arguments to pass to the WebElement constructor. + **kwargs: Keyword arguments to pass to the WebElement constructor. + + Returns: + WebElement: A new WebElement instance. """ from pydoll.element import WebElement # noqa: PLC0415 @@ -15,6 +26,14 @@ def create_web_element(*args, **kwargs): class FindElementsMixin: + """ + A mixin class that provides element finding and waiting capabilities. + + This mixin provides methods for finding elements in the DOM using various + selector strategies, waiting for elements to appear, and interacting with + elements. Classes that include this mixin will gain the ability to locate + elements in web pages. + """ async def wait_element( self, by: DomCommands.SelectorType, @@ -25,17 +44,26 @@ async def wait_element( """ Waits for an element to be present in the DOM. + This method repeatedly attempts to find an element until it is found or + the timeout is reached. It is useful for handling dynamic content that + may not be immediately available. + Args: - by (SelectorType): The type of selector to use. - value (str): The value of the selector. - timeout (int, optional): Time in seconds to wait for the element. - Defaults to 10. + by (SelectorType): The type of selector to use + (e.g., 'css', 'xpath'). + value (str): The value of the selector to locate the element. + timeout (int): Maximum time in seconds to wait for the element. + Defaults to 10 seconds. + raise_exc (bool): Whether to raise an exception if the element + is not found within the timeout. Defaults to True. Returns: - Element: The element found in the DOM. + WebElement or None: The element found in the DOM, or None if + not found and raise_exc is False. Raises: - TimeoutError: If the element is not found within the timeout. + TimeoutError: If the element is not found within the timeout and + raise_exc is True. """ start_time = asyncio.get_event_loop().time() while True: @@ -59,12 +87,21 @@ async def find_element( """ Finds an element on the current page using the specified selector. + This method locates the first element matching the given selector and + returns a WebElement instance representing that element. If no element + is found, it either raises an exception or returns None, depending on + the raise_exc parameter. + Args: - by (SelectorType): The type of selector to use. - value (str): The value of the selector to use. + by (SelectorType): The type of selector to use + (e.g., 'css', 'xpath'). + value (str): The value of the selector to locate the element. + raise_exc (bool): Whether to raise an exception if the element + is not found. Defaults to True. Returns: - dict: The response from the browser. + WebElement or None: The found element as a WebElement instance, or + None if no element is found and raise_exc is False. Raises: ElementNotFound: If the element is not found and raise_exc is True. @@ -98,12 +135,22 @@ async def find_elements( """ Finds all elements on the current page using the specified selector. + This method locates all elements matching the given selector and + returns a list of WebElement instances. If no elements are found, + it either raises an exception or returns an empty list, depending on + the raise_exc parameter. + Args: - by (SelectorType): The type of selector to use. - value (str): The value of the selector to use. + by (SelectorType): The type of selector to use + (e.g., 'css', 'xpath'). + value (str): The value of the selector to locate the elements. + raise_exc (bool): Whether to raise an exception if no elements are + found. Defaults to True. Returns: - list: A list of elements found on the page. + list[WebElement]: A list of WebElement instances representing the + found elements. Returns an empty list if no elements are found + and raise_exc is False. Raises: ElementNotFound: If no elements are found and raise_exc is True. @@ -154,11 +201,19 @@ async def _describe_node(self, object_id: str = '') -> dict: """ Provides a detailed description of a specific node within the DOM. + This method retrieves detailed information about a DOM node using its + object ID. The information includes the node's attributes, properties, + and relationship to other nodes. + Args: - node_id (int): The unique ID of the node to describe. + object_id (str): The unique object ID of the node to describe. + Defaults to an empty string, which typically refers to the + document node. Returns: - dict: A dictionary containing the detailed description of the node. + dict: A dictionary containing the detailed description of the node, + including its attributes, properties, and other + characteristics. """ response = await self._execute_command( DomCommands.describe_node(object_id=object_id) @@ -167,13 +222,19 @@ async def _describe_node(self, object_id: str = '') -> dict: async def _execute_command(self, command: dict) -> dict: """ - Executes a command on the page. + Executes a DevTools Protocol command on the page. + + This is an internal method used to send commands to the browser and + receive responses. It uses the connection handler to communicate with + the browser and has a longer timeout to accommodate potentially + time-consuming DOM operations. Args: - command (dict): The command to execute. + command (dict): The DevTools Protocol command to execute. Returns: - dict: The result of the command execution. + dict: The result of the command execution as returned by + the browser. """ return await self._connection_handler.execute_command( command, timeout=60