From feafc3f90c0f2ba3d03dc291e142f9a65d868199 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:12:22 +0200
Subject: [PATCH 01/70] Create client.ts

---
 src/client.ts | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/client.ts

diff --git a/src/client.ts b/src/client.ts
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/client.ts
@@ -0,0 +1 @@
+

From 292df2dfaa889bf73eec7857593638de5653e589 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:13:46 +0200
Subject: [PATCH 02/70] Add files via upload

---
 src/__init__.py |  82 +++++
 src/client.py   | 897 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 979 insertions(+)
 create mode 100644 src/__init__.py
 create mode 100644 src/client.py

diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..c815a6c
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,82 @@
+"""
+## Bright Data SDK for Python
+
+A comprehensive SDK for Bright Data's Web Scraping and SERP APIs, providing
+easy-to-use methods for web scraping, search engine result parsing, and data management.
+## Functions:
+First import the package and create a client:
+```python
+from brightdata import bdclient
+client = bdclient(your-apy-key)
+```
+Then use the client to call the desired functions:  
+#### scrape()
+- Scrapes a website using Bright Data Web Unblocker API with proxy support (or multiple websites sequentially)
+- syntax: `results = client.scrape(url, country, max_workers, ...)`
+#### .scrape_linkedin. class
+- Scrapes LinkedIn data including posts, jobs, companies, and profiles, recieve structured data as a result
+- syntax: `results = client.scrape_linkedin.posts()/jobs()/companies()/profiles() # insert parameters per function`
+#### search()
+- Performs web searches using Bright Data SERP API with customizable search engines (or multiple search queries sequentially)
+- syntax: `results = client.search(query, search_engine, country, ...)`
+#### .search_linkedin. class
+- Search LinkedIn data including for specific posts, jobs, profiles. recieve the relevent data as a result
+- syntax: `results = client.search_linkedin.posts()/jobs()/profiles() # insert parameters per function`
+#### search_chatGPT()
+- Interact with ChatGPT using Bright Data's ChatGPT API, sending prompts and receiving responses
+- syntax: `results = client.search_chatGPT(prompt, additional_prompt, max_workers, ...)`
+#### download_content() / download_snapshot()
+- Saves the scraped content to local files in various formats (JSON, CSV, etc.)
+- syntax: `client.download_content(results)`
+- syntax: `client.download_snapshot(results)`
+#### connect_browser()
+- Get WebSocket endpoint for connecting to Bright Data's scraping browser with Playwright/Selenium
+- syntax: `endpoint_url = client.connect_browser()` then use with browser automation tools
+#### crawl()
+- Crawl websites to discover and scrape multiple pages using Bright Data's Web Crawl API
+- syntax: `result = client.crawl(url, filter, exclude_filter, depth, ...)`
+#### parse_content()
+- Parse and extract useful information from API responses (JSON or HTML)
+- syntax: `parsed = client.parse_content(data, extract_text=True, extract_links=True)`
+
+### Features:
+- Web Scraping: Scrape websites using Bright Data Web Unlocker API with proxy support
+- Search Engine Results: Perform web searches using Bright Data SERP API  
+- Web Crawling: Discover and scrape multiple pages from websites with advanced filtering
+- Content Parsing: Extract text, links, images, and structured data from API responses
+- Browser Automation: Simple authentication for Bright Data's scraping browser with Playwright/Selenium
+- Multiple Search Engines: Support for Google, Bing, and Yandex
+- Parallel Processing: Concurrent processing for multiple URLs or queries
+- Robust Error Handling: Comprehensive error handling with retry logic
+- Input Validation: Automatic validation of URLs, zone names, and parameters
+- Zone Management: Automatic zone creation and management
+- Multiple Output Formats: JSON, raw HTML, markdown, and more
+"""
+
+from .client import bdclient
+from .exceptions import (
+    BrightDataError,
+    ValidationError,
+    AuthenticationError,
+    ZoneError,
+    NetworkError,
+    APIError
+)
+from .utils import parse_content, parse_multiple, extract_structured_data
+
+__version__ = "1.1.3"
+__author__ = "Bright Data"
+__email__ = "support@brightdata.com"
+
+__all__ = [
+    'bdclient',
+    'BrightDataError',
+    'ValidationError', 
+    'AuthenticationError',
+    'ZoneError',
+    'NetworkError',
+    'APIError',
+    'parse_content',
+    'parse_multiple',
+    'extract_structured_data'
+]
\ No newline at end of file
diff --git a/src/client.py b/src/client.py
new file mode 100644
index 0000000..b148792
--- /dev/null
+++ b/src/client.py
@@ -0,0 +1,897 @@
+import os
+import json
+import requests
+from datetime import datetime
+from typing import Union, Dict, Any, List
+
+from .api import WebScraper, SearchAPI
+from .api.chatgpt import ChatGPTAPI
+from .api.linkedin import LinkedInAPI, LinkedInScraper, LinkedInSearcher
+from .api.download import DownloadAPI
+from .api.crawl import CrawlAPI
+from .api.extract import ExtractAPI
+from .utils import ZoneManager, setup_logging, get_logger, parse_content
+from .exceptions import ValidationError, AuthenticationError, APIError
+
+def _get_version():
+    """Get version from __init__.py, cached at module import time."""
+    try:
+        import os
+        init_file = os.path.join(os.path.dirname(__file__), '__init__.py')
+        with open(init_file, 'r', encoding='utf-8') as f:
+            for line in f:
+                if line.startswith('__version__'):
+                    return line.split('"')[1]
+    except (OSError, IndexError):
+        pass
+    return "unknown"
+
+__version__ = _get_version()
+
+logger = get_logger('client')
+
+
+class bdclient:
+    """Main client for the Bright Data SDK"""
+    
+    DEFAULT_MAX_WORKERS = 10
+    DEFAULT_TIMEOUT = 65
+    CONNECTION_POOL_SIZE = 20
+    MAX_RETRIES = 3
+    RETRY_BACKOFF_FACTOR = 1.5
+    RETRY_STATUSES = {429, 500, 502, 503, 504}
+    
+    def __init__(
+        self, 
+        api_token: str = None,
+        auto_create_zones: bool = True,
+        web_unlocker_zone: str = None,
+        serp_zone: str = None,
+        browser_zone: str = None,
+        browser_username: str = None,
+        browser_password: str = None,
+        browser_type: str = "playwright",
+        log_level: str = "INFO",
+        structured_logging: bool = True,
+        verbose: bool = None
+    ):
+        """
+        Initialize the Bright Data client with your API token
+        
+        Create an account at https://brightdata.com/ to get your API token.
+        Go to settings > API keys , and verify that your API key have "Admin" permissions.
+
+        Args:
+            api_token: Your Bright Data API token (can also be set via BRIGHTDATA_API_TOKEN env var)
+            auto_create_zones: Automatically create required zones if they don't exist (default: True)
+            web_unlocker_zone: Custom zone name for web unlocker (default: from env or 'sdk_unlocker')
+            serp_zone: Custom zone name for SERP API (default: from env or 'sdk_serp')
+            browser_zone: Custom zone name for Browser API (default: from env or 'sdk_browser')
+            browser_username: Username for Browser API in format "username-zone-{zone_name}" (can also be set via BRIGHTDATA_BROWSER_USERNAME env var)
+            browser_password: Password for Browser API authentication (can also be set via BRIGHTDATA_BROWSER_PASSWORD env var)
+            browser_type: Browser automation tool type - "playwright", "puppeteer", or "selenium" (default: "playwright")
+            log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+            structured_logging: Whether to use structured JSON logging (default: True)
+            verbose: Enable verbose logging (default: False). Can also be set via BRIGHTDATA_VERBOSE env var.
+                    When False, only shows WARNING and above. When True, shows all logs per log_level.
+        """
+        try:
+            from dotenv import load_dotenv
+            load_dotenv()
+        except ImportError:
+            pass
+        
+        if verbose is None:
+            env_verbose = os.getenv('BRIGHTDATA_VERBOSE', '').lower()
+            verbose = env_verbose in ('true', '1', 'yes', 'on')
+        
+        setup_logging(log_level, structured_logging, verbose)
+        logger.info("Initializing Bright Data SDK client")
+            
+        self.api_token = api_token or os.getenv('BRIGHTDATA_API_TOKEN')
+        if not self.api_token:
+            logger.error("API token not provided")
+            raise ValidationError("API token is required. Provide it as parameter or set BRIGHTDATA_API_TOKEN environment variable")
+        
+        if not isinstance(self.api_token, str):
+            logger.error("API token must be a string")
+            raise ValidationError("API token must be a string")
+        
+        if len(self.api_token.strip()) < 10:
+            logger.error("API token appears to be invalid (too short)")
+            raise ValidationError("API token appears to be invalid")
+        
+        token_preview = f"{self.api_token[:4]}***{self.api_token[-4:]}" if len(self.api_token) > 8 else "***"
+        logger.info(f"API token validated successfully: {token_preview}")
+            
+        self.web_unlocker_zone = web_unlocker_zone or os.getenv('WEB_UNLOCKER_ZONE', 'sdk_unlocker')
+        self.serp_zone = serp_zone or os.getenv('SERP_ZONE', 'sdk_serp')
+        self.browser_zone = browser_zone or os.getenv('BROWSER_ZONE', 'sdk_browser')
+        self.auto_create_zones = auto_create_zones
+        
+        self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
+        self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
+        
+        
+        
+        valid_browser_types = ["playwright", "puppeteer", "selenium"]
+        if browser_type not in valid_browser_types:
+            raise ValidationError(f"Invalid browser_type '{browser_type}'. Must be one of: {valid_browser_types}")
+        self.browser_type = browser_type
+        
+        if self.browser_username and self.browser_password:
+            browser_preview = f"{self.browser_username[:3]}***"
+            logger.info(f"Browser credentials configured: {browser_preview} (type: {self.browser_type})")
+        elif self.browser_username or self.browser_password:
+            logger.warning("Incomplete browser credentials: both username and password are required for browser API")
+        else:
+            logger.debug("No browser credentials provided - browser API will not be available")
+        
+        self.session = requests.Session()
+        
+        auth_header = f'Bearer {self.api_token}'
+        self.session.headers.update({
+            'Authorization': auth_header,
+            'Content-Type': 'application/json',
+            'User-Agent': f'brightdata-sdk/{__version__}'
+        })
+        
+        logger.info("HTTP session configured with secure headers")
+        
+        adapter = requests.adapters.HTTPAdapter(
+            pool_connections=self.CONNECTION_POOL_SIZE,
+            pool_maxsize=self.CONNECTION_POOL_SIZE,
+            max_retries=0
+        )
+        self.session.mount('https://', adapter)
+        self.session.mount('http://', adapter)
+        
+        self.zone_manager = ZoneManager(self.session)
+        self.web_scraper = WebScraper(
+            self.session, 
+            self.DEFAULT_TIMEOUT, 
+            self.MAX_RETRIES, 
+            self.RETRY_BACKOFF_FACTOR
+        )
+        self.search_api = SearchAPI(
+            self.session,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+        self.chatgpt_api = ChatGPTAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+        self.linkedin_api = LinkedInAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+        self.download_api = DownloadAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT
+        )
+        self.crawl_api = CrawlAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+        self.extract_api = ExtractAPI(self)
+        
+        if self.auto_create_zones:
+            self.zone_manager.ensure_required_zones(
+                self.web_unlocker_zone, 
+                self.serp_zone
+            )
+    
+    def scrape(
+        self,
+        url: Union[str, List[str]],
+        zone: str = None,
+        response_format: str = "raw",
+        method: str = "GET", 
+        country: str = "",
+        data_format: str = "html",
+        async_request: bool = False,
+        max_workers: int = None,
+        timeout: int = None
+    ) -> Union[Dict[str, Any], str, List[Union[Dict[str, Any], str]]]:
+        """
+        ## Unlock and scrape websites using Bright Data Web Unlocker API
+        
+        Scrapes one or multiple URLs through Bright Data's proxy network with anti-bot detection bypass.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Single URL string or list of URLs to scrape
+        - `zone` (str, optional): Zone identifier (default: auto-configured web_unlocker_zone)
+        - `response_format` (str, optional): Response format - `"json"` for structured data, `"raw"` for HTML string (default: `"raw"`)
+        - `method` (str, optional): HTTP method for the request (default: `"GET"`)
+        - `country` (str, optional): Two-letter ISO country code for proxy location (defaults to fastest connection)
+        - `data_format` (str, optional): Additional format transformation (default: `"html"`)
+        - `async_request` (bool, optional): Enable asynchronous processing (default: `False`)
+        - `max_workers` (int, optional): Maximum parallel workers for multiple URLs (default: `10`)
+        - `timeout` (int, optional): Request timeout in seconds (default: `30`)
+        
+        ### Returns:
+        - Single URL: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
+        - Multiple URLs: `List[Union[Dict[str, Any], str]]` corresponding to each input URL
+        
+        ### Example Usage:
+        ```python
+        # Single URL scraping
+        result = client.scrape(
+            url="https://example.com", 
+            response_format="json"
+        )
+        
+        # Multiple URLs scraping
+        urls = ["https://site1.com", "https://site2.com"]
+        results = client.scrape(
+            url=urls,
+            response_format="raw",
+            max_workers=5
+        )
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid URL format or empty URL list
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed or server error
+        """
+        zone = zone or self.web_unlocker_zone
+        max_workers = max_workers or self.DEFAULT_MAX_WORKERS
+        
+        return self.web_scraper.scrape(
+            url, zone, response_format, method, country, data_format,
+            async_request, max_workers, timeout
+        )
+
+    def search(
+        self,
+        query: Union[str, List[str]],
+        search_engine: str = "google",
+        zone: str = None,
+        response_format: str = "raw",
+        method: str = "GET",
+        country: str = "",
+        data_format: str = "html",
+        async_request: bool = False,
+        max_workers: int = None,
+        timeout: int = None,
+        parse: bool = False
+    ) -> Union[Dict[str, Any], str, List[Union[Dict[str, Any], str]]]:
+        """
+        ## Search the web using Bright Data SERP API
+        
+        Performs web searches through major search engines using Bright Data's proxy network 
+        for reliable, bot-detection-free results.
+        
+        ### Parameters:
+        - `query` (str | List[str]): Search query string or list of search queries
+        - `search_engine` (str, optional): Search engine to use - `"google"`, `"bing"`, or `"yandex"` (default: `"google"`)
+        - `zone` (str, optional): Zone identifier (default: auto-configured serp_zone)
+        - `response_format` (str, optional): Response format - `"json"` for structured data, `"raw"` for HTML string (default: `"raw"`)
+        - `method` (str, optional): HTTP method for the request (default: `"GET"`)
+        - `country` (str, optional): Two-letter ISO country code for proxy location (default: `"us"`)
+        - `data_format` (str, optional): Additional format transformation (default: `"html"`)
+        - `async_request` (bool, optional): Enable asynchronous processing (default: `False`)
+        - `max_workers` (int, optional): Maximum parallel workers for multiple queries (default: `10`)
+        - `timeout` (int, optional): Request timeout in seconds (default: `30`)
+        - `parse` (bool, optional): Enable JSON parsing by adding brd_json=1 to URL (default: `False`)
+        
+        ### Returns:
+        - Single query: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
+        - Multiple queries: `List[Union[Dict[str, Any], str]]` corresponding to each input query
+        
+        ### Example Usage:
+        ```python
+        # Single search query
+        result = client.search(
+            query="best laptops 2024",
+            search_engine="google",
+            response_format="json"
+        )
+        
+        # Multiple search queries
+        queries = ["python tutorials", "machine learning courses", "web development"]
+        results = client.search(
+            query=queries,
+            search_engine="bing",
+            max_workers=3
+        )
+        ```
+        
+        ### Supported Search Engines:
+        - `"google"` - Google Search
+        - `"bing"` - Microsoft Bing
+        - `"yandex"` - Yandex Search
+        
+        ### Raises:
+        - `ValidationError`: Invalid search engine, empty query, or validation errors
+        - `AuthenticationError`: Invalid API token or insufficient permissions  
+        - `APIError`: Request failed or server error
+        """
+        zone = zone or self.serp_zone
+        max_workers = max_workers or self.DEFAULT_MAX_WORKERS
+        
+        return self.search_api.search(
+            query, search_engine, zone, response_format, method, country,
+            data_format, async_request, max_workers, timeout, parse
+        )
+
+    def download_content(self, content: Union[Dict, str], filename: str = None, format: str = "json", parse: bool = False) -> str:
+        """
+        ## Download content to a file based on its format
+        
+        ### Args:
+            content: The content to download (dict for JSON, string for other formats)
+            filename: Optional filename. If not provided, generates one with timestamp
+            format: Format of the content ("json", "csv", "ndjson", "jsonl", "txt")
+            parse: If True, automatically parse JSON strings in 'body' fields to objects (default: False)
+        
+        ### Returns:
+            Path to the downloaded file
+        """
+        return self.download_api.download_content(content, filename, format, parse)
+    
+
+    def search_chatGPT(
+        self,
+        prompt: Union[str, List[str]],
+        country: Union[str, List[str]] = "",
+        additional_prompt: Union[str, List[str]] = "",
+        web_search: Union[bool, List[bool]] = False,
+        sync: bool = True
+    ) -> Dict[str, Any]:
+        """
+        ## Search ChatGPT responses using Bright Data's ChatGPT dataset API
+        
+        Sends one or multiple prompts to ChatGPT through Bright Data's proxy network 
+        with support for both synchronous and asynchronous processing.
+        
+        ### Parameters:
+        - `prompt` (str | List[str]): Single prompt string or list of prompts to send to ChatGPT
+        - `country` (str | List[str], optional): Two-letter ISO country code(s) for proxy location (default: "")
+        - `additional_prompt` (str | List[str], optional): Follow-up prompt(s) after receiving the first answer (default: "")
+        - `web_search` (bool | List[bool], optional): Whether to click the web search button in ChatGPT (default: False)
+        - `sync` (bool, optional): If True (default), returns data immediately. If False, returns snapshot_id for async processing
+        
+        ### Returns:
+        - `Dict[str, Any]`: If sync=True, returns ChatGPT response data directly. If sync=False, returns response with snapshot_id for async processing
+        
+        ### Example Usage:
+        ```python
+        # Single prompt (synchronous - returns data immediately)
+        result = client.search_chatGPT(prompt="Top hotels in New York")
+        
+        # Multiple prompts (synchronous - returns data immediately)
+        result = client.search_chatGPT(
+            prompt=["Top hotels in New York", "Best restaurants in Paris", "Tourist attractions in Tokyo"],
+            additional_prompt=["Are you sure?", "", "What about hidden gems?"]
+        )
+        
+        # Asynchronous with web search enabled (returns snapshot_id)
+        result = client.search_chatGPT(
+            prompt="Latest AI developments", 
+            web_search=True,
+            sync=False
+        )
+        # Snapshot ID is automatically printed for async requests
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid prompt or parameters
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed or server error
+        """
+        if isinstance(prompt, str):
+            prompts = [prompt]
+        else:
+            prompts = prompt
+            
+        if not prompts or len(prompts) == 0:
+            raise ValidationError("At least one prompt is required")
+            
+        for p in prompts:
+            if not p or not isinstance(p, str):
+                raise ValidationError("All prompts must be non-empty strings")
+        
+        def normalize_param(param, param_name):
+            if isinstance(param, list):
+                if len(param) != len(prompts):
+                    raise ValidationError(f"{param_name} list must have same length as prompts list")
+                return param
+            else:
+                return [param] * len(prompts)
+        
+        countries = normalize_param(country, "country")
+        additional_prompts = normalize_param(additional_prompt, "additional_prompt")
+        web_searches = normalize_param(web_search, "web_search")
+        
+        for c in countries:
+            if not isinstance(c, str):
+                raise ValidationError("All countries must be strings")
+                
+        for ap in additional_prompts:
+            if not isinstance(ap, str):
+                raise ValidationError("All additional_prompts must be strings")
+                
+        for ws in web_searches:
+            if not isinstance(ws, bool):
+                raise ValidationError("All web_search values must be booleans")
+        
+        return self.chatgpt_api.scrape_chatgpt(
+            prompts, 
+            countries, 
+            additional_prompts, 
+            web_searches,
+            sync,
+            self.DEFAULT_TIMEOUT
+        )
+
+    @property
+    def scrape_linkedin(self):
+        """
+        ## LinkedIn Data Scraping Interface
+        
+        Provides specialized methods for scraping different types of LinkedIn data
+        using Bright Data's collect API with pre-configured dataset IDs.
+        
+        ### Available Methods:
+        - `profiles(url)` - Scrape LinkedIn profile data
+        - `companies(url)` - Scrape LinkedIn company data  
+        - `jobs(url)` - Scrape LinkedIn job listing data
+        - `posts(url)` - Scrape LinkedIn post content
+        
+        ### Example Usage:
+        ```python
+        # Scrape LinkedIn profiles
+        result = client.scrape_linkedin.profiles("https://www.linkedin.com/in/username/")
+        
+        # Scrape multiple companies
+        companies = [
+            "https://www.linkedin.com/company/ibm",
+            "https://www.linkedin.com/company/bright-data"
+        ]
+        result = client.scrape_linkedin.companies(companies)
+        
+        # Scrape job listings
+        result = client.scrape_linkedin.jobs("https://www.linkedin.com/jobs/view/123456/")
+        
+        # Scrape posts
+        result = client.scrape_linkedin.posts("https://www.linkedin.com/posts/user-activity-123/")
+        ```
+        
+        ### Returns:
+        Each method returns a `Dict[str, Any]` containing snapshot_id and metadata for tracking the request.
+        Use the snapshot_id with `download_snapshot()` to retrieve the collected data.
+        """
+        if not hasattr(self, '_linkedin_scraper'):
+            self._linkedin_scraper = LinkedInScraper(self.linkedin_api)
+        return self._linkedin_scraper
+
+    @property
+    def search_linkedin(self):
+        """
+        ## LinkedIn Data Search Interface
+        
+        Provides specialized methods for discovering new LinkedIn data by various search criteria
+        using Bright Data's collect API with pre-configured dataset IDs.
+        
+        ### Available Methods:
+        - `profiles(first_name, last_name)` - Search LinkedIn profiles by name
+        - `jobs(url=..., location=...)` - Search LinkedIn jobs by URL or keyword criteria
+        - `posts(profile_url=..., company_url=..., url=...)` - Search LinkedIn posts by various methods
+        
+        ### Example Usage:
+        ```python
+        # Search profiles by name
+        result = client.search_linkedin.profiles("James", "Smith")
+        
+        # Search jobs by location and keywords
+        result = client.search_linkedin.jobs(
+            location="Paris", 
+            keyword="product manager", 
+            country="FR"
+        )
+        
+        # Search posts by profile URL with date range
+        result = client.search_linkedin.posts(
+            profile_url="https://www.linkedin.com/in/username",
+            start_date="2018-04-25T00:00:00.000Z",
+            end_date="2021-05-25T00:00:00.000Z"
+        )
+        ```
+        
+        ### Returns:
+        Each method returns a `Dict[str, Any]` containing snapshot_id (async) or direct data (sync) for tracking the request.
+        Use the snapshot_id with `download_snapshot()` to retrieve the collected data.
+        """
+        if not hasattr(self, '_linkedin_searcher'):
+            self._linkedin_searcher = LinkedInSearcher(self.linkedin_api)
+        return self._linkedin_searcher
+
+    def download_snapshot(
+        self,
+        snapshot_id: str,
+        format: str = "json",
+        compress: bool = False,
+        batch_size: int = None,
+        part: int = None
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]], str]:
+        """
+        ## Download snapshot content from Bright Data dataset API
+        
+        Downloads the snapshot content using the snapshot ID returned from scrape_chatGPT() 
+        or other dataset collection triggers.
+        
+        ### Parameters:
+        - `snapshot_id` (str): The snapshot ID returned when collection was triggered (required)
+        - `format` (str, optional): Format of the data - "json", "ndjson", "jsonl", or "csv" (default: "json")
+        - `compress` (bool, optional): Whether the result should be compressed (default: False)
+        - `batch_size` (int, optional): Divide into batches of X records (minimum: 1000)
+        - `part` (int, optional): If batch_size provided, specify which part to download
+        
+        ### Returns:
+        - `Union[Dict, List, str]`: Snapshot data in the requested format, OR
+        - `Dict`: Status response if snapshot is not ready yet (status="not_ready")
+        
+        ### Example Usage:
+        ```python
+        # Download complete snapshot
+        result = client.download_snapshot("s_m4x7enmven8djfqak")
+        
+        # Check if snapshot is ready
+        if isinstance(result, dict) and result.get('status') == 'not_ready':
+            print(f"Not ready: {result['message']}")
+            # Try again later
+        else:
+            # Snapshot data is ready
+            data = result
+        
+        # Download as CSV format
+        csv_data = client.download_snapshot("s_m4x7enmven8djfqak", format="csv")
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid parameters or snapshot_id format
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed, snapshot not found, or server error
+        """
+        return self.download_api.download_snapshot(snapshot_id, format, compress, batch_size, part)
+
+
+    def list_zones(self) -> List[Dict[str, Any]]:
+        """
+        ## List all active zones in your Bright Data account
+        
+        ### Returns:
+            List of zone dictionaries with their configurations
+        """
+        return self.zone_manager.list_zones()
+
+    def connect_browser(self) -> str:
+        """
+        ## Get WebSocket endpoint URL for connecting to Bright Data's scraping browser
+        
+        Returns the WebSocket endpoint URL that can be used with Playwright or Selenium
+        to connect to Bright Data's scraping browser service.
+        
+        ### Returns:
+            WebSocket endpoint URL string for browser connection
+            
+        ### Example Usage:
+        ```python
+        # For Playwright (default)
+        client = bdclient(
+            api_token="your_token",
+            browser_username="username-zone-browser_zone1",
+            browser_password="your_password",
+            browser_type="playwright"  # Playwright/ Puppeteer (default)
+        )
+        endpoint_url = client.connect_browser()  # Returns: wss://...@brd.superproxy.io:9222
+        
+        # For Selenium
+        client = bdclient(
+            api_token="your_token", 
+            browser_username="username-zone-browser_zone1",
+            browser_password="your_password",
+            browser_type="selenium"
+        )
+        endpoint_url = client.connect_browser()  # Returns: https://...@brd.superproxy.io:9515
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Browser credentials not provided or invalid
+        - `AuthenticationError`: Invalid browser credentials
+        """
+        if not self.browser_username or not self.browser_password:
+            logger.error("Browser credentials not configured")
+            raise ValidationError(
+                "Browser credentials are required. Provide browser_username and browser_password "
+                "parameters or set BRIGHTDATA_BROWSER_USERNAME and BRIGHTDATA_BROWSER_PASSWORD "
+                "environment variables."
+            )
+        
+        if not isinstance(self.browser_username, str) or not isinstance(self.browser_password, str):
+            logger.error("Browser credentials must be strings")
+            raise ValidationError("Browser username and password must be strings")
+        
+        if len(self.browser_username.strip()) == 0 or len(self.browser_password.strip()) == 0:
+            logger.error("Browser credentials cannot be empty")
+            raise ValidationError("Browser username and password cannot be empty")
+        
+        auth_string = f"{self.browser_username}:{self.browser_password}"
+        
+        if self.browser_type == "selenium":
+            endpoint_url = f"https://{auth_string}@brd.superproxy.io:9515"
+            logger.debug(f"Browser endpoint URL: https://***:***@brd.superproxy.io:9515")
+        else:
+            endpoint_url = f"wss://{auth_string}@brd.superproxy.io:9222"
+            logger.debug(f"Browser endpoint URL: wss://***:***@brd.superproxy.io:9222")
+        
+        logger.info(f"Generated {self.browser_type} connection endpoint for user: {self.browser_username[:3]}***")
+        
+        return endpoint_url
+
+    def crawl(
+        self,
+        url: Union[str, List[str]],
+        ignore_sitemap: bool = None,
+        depth: int = None,
+        filter: str = None,
+        exclude_filter: str = None,
+        custom_output_fields: List[str] = None,
+        include_errors: bool = True
+    ) -> Dict[str, Any]:
+        """
+        ## Crawl websites using Bright Data's Web Crawl API
+        
+        Performs web crawling to discover and scrape multiple pages from a website
+        starting from the specified URL(s). Returns a snapshot_id for tracking the crawl progress.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Domain URL(s) to crawl (required)
+        - `ignore_sitemap` (bool, optional): Ignore sitemap when crawling
+        - `depth` (int, optional): Maximum depth to crawl relative to the entered URL
+        - `filter` (str, optional): Regular expression to include only certain URLs (e.g. "/product/")
+        - `exclude_filter` (str, optional): Regular expression to exclude certain URLs (e.g. "/ads/")
+        - `custom_output_fields` (List[str], optional): Custom output schema fields to include
+        - `include_errors` (bool, optional): Include errors in response (default: True)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Crawl response with snapshot_id for tracking
+        
+        ### Example Usage:
+        ```python
+        # Single URL crawl
+        result = client.crawl("https://example.com/")
+        snapshot_id = result['snapshot_id']
+        
+        # Multiple URLs with filters
+        urls = ["https://example.com/", "https://example2.com/"]
+        result = client.crawl(
+            url=urls,
+            filter="/product/",
+            exclude_filter="/ads/",
+            depth=2,
+            ignore_sitemap=True
+        )
+        
+        # Custom output schema
+        result = client.crawl(
+            url="https://example.com/",
+            custom_output_fields=["markdown", "url", "page_title"]
+        )
+        
+        # Download results using snapshot_id
+        data = client.download_snapshot(result['snapshot_id'])
+        ```
+        
+        ### Available Output Fields:
+        - `markdown` - Page content in markdown format
+        - `url` - Page URL
+        - `html2text` - Page content as plain text
+        - `page_html` - Raw HTML content
+        - `ld_json` - Structured data (JSON-LD)
+        - `page_title` - Page title
+        - `timestamp` - Crawl timestamp
+        - `input` - Input parameters used
+        - `discovery_input` - Discovery parameters
+        - `error` - Error information (if any)
+        - `error_code` - Error code (if any)
+        - `warning` - Warning information (if any)
+        - `warning_code` - Warning code (if any)
+        
+        ### Raises:
+        - `ValidationError`: Invalid URL or parameters
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed or server error
+        """
+        return self.crawl_api.crawl(
+            url=url,
+            ignore_sitemap=ignore_sitemap,
+            depth=depth,
+            filter=filter,
+            exclude_filter=exclude_filter,
+            custom_output_fields=custom_output_fields,
+            include_errors=include_errors
+        )
+
+    def parse_content(
+        self,
+        data: Union[str, Dict, List],
+        extract_text: bool = True,
+        extract_links: bool = False,
+        extract_images: bool = False
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+        """
+        ## Parse content from API responses
+        
+        Extract and parse useful information from scraping, search, or crawling results.
+        Automatically detects and handles both single and multiple results from batch operations.
+        
+        ### Parameters:
+        - `data` (str | Dict | List): Response data from scrape(), search(), or crawl() methods
+        - `extract_text` (bool, optional): Extract clean text content (default: True)
+        - `extract_links` (bool, optional): Extract all links from content (default: False)
+        - `extract_images` (bool, optional): Extract image URLs from content (default: False)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Parsed content for single results
+        - `List[Dict[str, Any]]`: List of parsed content for multiple results (auto-detected)
+        
+        ### Example Usage:
+        ```python
+        # Parse single URL results
+        scraped_data = client.scrape("https://example.com")
+        parsed = client.parse_content(scraped_data, extract_text=True, extract_links=True)
+        print(f"Title: {parsed['title']}")
+        
+        # Parse multiple URL results (auto-detected)
+        scraped_data = client.scrape(["https://example1.com", "https://example2.com"])
+        parsed_list = client.parse_content(scraped_data, extract_text=True)
+        for result in parsed_list:
+            print(f"Title: {result['title']}")
+        ```
+        
+        ### Available Fields in Each Result:
+        - `type`: 'json' or 'html' - indicates the source data type
+        - `text`: Cleaned text content (if extract_text=True)
+        - `links`: List of {'url': str, 'text': str} objects (if extract_links=True)
+        - `images`: List of {'url': str, 'alt': str} objects (if extract_images=True)
+        - `title`: Page title (if available)
+        - `raw_length`: Length of original content
+        - `structured_data`: Original JSON data (if type='json')
+        """
+        return parse_content(
+            data=data,
+            extract_text=extract_text,
+            extract_links=extract_links,
+            extract_images=extract_images
+        )
+
+    def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme: Dict[str, Any] = None, llm_key: str = None) -> str:
+        """
+        ## Extract specific information from websites using AI
+        
+        Combines web scraping with OpenAI's language models to extract targeted information
+        from web pages based on natural language queries. Automatically parses URLs and
+        optimizes content for efficient LLM processing.
+        
+        ### Parameters:
+        - `query` (str): Natural language query describing what to extract. If `url` parameter is provided,
+                        this becomes the pure extraction query. If `url` is not provided, this should include 
+                        the URL (e.g. "extract the most recent news from cnn.com")
+        - `url` (str | List[str], optional): Direct URL(s) to scrape. If provided, bypasses URL extraction 
+                        from query and sends these URLs to the web unlocker API
+        - `output_scheme` (dict, optional): JSON Schema defining the expected structure for the LLM response.
+                        Uses OpenAI's Structured Outputs for reliable type-safe responses.
+                        Example: {"type": "object", "properties": {"title": {"type": "string"}, "date": {"type": "string"}}, "required": ["title", "date"]}
+        - `llm_key` (str, optional): OpenAI API key. If not provided, uses OPENAI_API_KEY env variable
+        
+        ### Returns:
+        - `str`: Extracted content (also provides access to metadata via attributes)
+        
+        ### Example Usage:
+        ```python
+        # Using URL parameter with structured output (new)
+        result = client.extract(
+            query="extract the most recent news headlines",
+            url="https://cnn.com",
+            output_scheme={
+                "type": "object",
+                "properties": {
+                    "headlines": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "title": {"type": "string"},
+                                "date": {"type": "string"}
+                            },
+                            "required": ["title", "date"]
+                        }
+                    }
+                },
+                "required": ["headlines"]
+            }
+        )
+        print(result)  # Prints the extracted news content
+        
+        # Using URL in query (original behavior)
+        result = client.extract("extract the most recent news from cnn.com")
+        
+        # Multiple URLs with structured schema
+        result = client.extract(
+            query="extract main headlines", 
+            url=["https://cnn.com", "https://bbc.com"],
+            output_scheme={
+                "type": "object",
+                "properties": {
+                    "sources": {
+                        "type": "array",
+                        "items": {
+                            "type": "object", 
+                            "properties": {
+                                "source_name": {"type": "string"},
+                                "headlines": {"type": "array", "items": {"type": "string"}}
+                            },
+                            "required": ["source_name", "headlines"]
+                        }
+                    }
+                },
+                "required": ["sources"]
+            }
+        )
+        
+        # Access metadata attributes
+        print(f"Source: {result.url}")
+        print(f"Title: {result.source_title}")
+        print(f"Tokens used: {result.token_usage['total_tokens']}")
+        
+        # Use with custom OpenAI key
+        result = client.extract(
+            query="get the price and description",
+            url="https://amazon.com/dp/B079QHML21",
+            llm_key="your-openai-api-key"
+        )
+        ```
+        
+        ### Environment Variable Setup:
+        ```bash
+        # Set in .env file
+        OPENAI_API_KEY=your-openai-api-key
+        ```
+        
+        ### Available Attributes:
+        ```python
+        result = client.extract("extract news from cnn.com")
+        
+        # String value (default behavior)
+        str(result)              # Extracted content
+        
+        # Metadata attributes
+        result.query             # 'extract news'  
+        result.url               # 'https://www.cnn.com'
+        result.source_title      # 'CNN - Breaking News...'
+        result.content_length    # 1234
+        result.token_usage       # {'total_tokens': 2998, ...}
+        result.success           # True
+        result.metadata          # Full metadata dictionary
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid query format, missing URL, or invalid LLM key
+        - `APIError`: Web scraping failed or LLM processing error
+        """
+        return self.extract_api.extract(query, url, output_scheme, llm_key)
\ No newline at end of file

From cc78cc0c18fde28e3c2222f7702ca791bb03913b Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:14:03 +0200
Subject: [PATCH 03/70] Delete src/client.ts

---
 src/client.ts | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 src/client.ts

diff --git a/src/client.ts b/src/client.ts
deleted file mode 100644
index 8b13789..0000000
--- a/src/client.ts
+++ /dev/null
@@ -1 +0,0 @@
-

From 94cce094c221c5168796ca3890d1c52e95ee5bfc Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:14:58 +0200
Subject: [PATCH 04/70] Create tst

---
 src/api/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/api/tst

diff --git a/src/api/tst b/src/api/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/api/tst
@@ -0,0 +1 @@
+

From db06632ab8569fb6860837d17d41265be3c3dce2 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:15:23 +0200
Subject: [PATCH 05/70] Add files via upload

---
 src/api/__init__.py |  13 +
 src/api/chatgpt.py  | 126 +++++++
 src/api/crawl.py    | 175 ++++++++++
 src/api/download.py | 265 +++++++++++++++
 src/api/extract.py  | 419 +++++++++++++++++++++++
 src/api/linkedin.py | 803 ++++++++++++++++++++++++++++++++++++++++++++
 src/api/scraper.py  | 205 +++++++++++
 src/api/search.py   | 212 ++++++++++++
 8 files changed, 2218 insertions(+)
 create mode 100644 src/api/__init__.py
 create mode 100644 src/api/chatgpt.py
 create mode 100644 src/api/crawl.py
 create mode 100644 src/api/download.py
 create mode 100644 src/api/extract.py
 create mode 100644 src/api/linkedin.py
 create mode 100644 src/api/scraper.py
 create mode 100644 src/api/search.py

diff --git a/src/api/__init__.py b/src/api/__init__.py
new file mode 100644
index 0000000..a79c0fd
--- /dev/null
+++ b/src/api/__init__.py
@@ -0,0 +1,13 @@
+from .scraper import WebScraper
+from .search import SearchAPI
+from .chatgpt import ChatGPTAPI
+from .linkedin import LinkedInAPI
+from .crawl import CrawlAPI
+
+__all__ = [
+    'WebScraper',
+    'SearchAPI',
+    'ChatGPTAPI',
+    'LinkedInAPI',
+    'CrawlAPI'
+]
\ No newline at end of file
diff --git a/src/api/chatgpt.py b/src/api/chatgpt.py
new file mode 100644
index 0000000..e9edb90
--- /dev/null
+++ b/src/api/chatgpt.py
@@ -0,0 +1,126 @@
+import json
+import requests
+from typing import Union, Dict, Any, List
+
+from ..utils import get_logger
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.chatgpt')
+
+
+class ChatGPTAPI:
+    """Handles ChatGPT scraping operations using Bright Data's ChatGPT dataset API"""
+    
+    def __init__(self, session, api_token, default_timeout=30, max_retries=3, retry_backoff=1.5):
+        self.session = session
+        self.api_token = api_token
+        self.default_timeout = default_timeout
+        self.max_retries = max_retries
+        self.retry_backoff = retry_backoff
+    
+    def scrape_chatgpt(
+        self,
+        prompts: List[str],
+        countries: List[str],
+        additional_prompts: List[str],
+        web_searches: List[bool],
+        sync: bool = True,
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """
+        Internal method to handle ChatGPT scraping API requests
+        
+        Parameters:
+        - prompts: List of prompts to send to ChatGPT
+        - countries: List of country codes matching prompts
+        - additional_prompts: List of follow-up prompts matching prompts
+        - web_searches: List of web_search flags matching prompts
+        - sync: If True, uses synchronous API for immediate results
+        - timeout: Request timeout in seconds
+        
+        Returns:
+        - Dict containing response with snapshot_id or direct data (if sync=True)
+        """
+        url = "https://api.brightdata.com/datasets/v3/scrape" if sync else "https://api.brightdata.com/datasets/v3/trigger"
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": "gd_m7aof0k82r803d5bjm",
+            "include_errors": "true"
+        }
+        
+        data = [
+            {
+                "url": "https://chatgpt.com/",
+                "prompt": prompts[i],
+                "country": countries[i],
+                "additional_prompt": additional_prompts[i],
+                "web_search": web_searches[i]
+            }
+            for i in range(len(prompts))
+        ]
+        
+        try:
+            response = self.session.post(
+                url, 
+                headers=headers, 
+                params=params, 
+                json=data, 
+                timeout=timeout or (65 if sync else self.default_timeout)
+            )
+            
+            if response.status_code == 401:
+                raise AuthenticationError("Invalid API token or insufficient permissions")
+            elif response.status_code != 200:
+                raise APIError(f"ChatGPT scraping request failed with status {response.status_code}: {response.text}")
+            
+            if sync:
+                response_text = response.text
+                if '\n{' in response_text and response_text.strip().startswith('{'):
+                    json_objects = []
+                    for line in response_text.strip().split('\n'):
+                        if line.strip():
+                            try:
+                                json_objects.append(json.loads(line))
+                            except json.JSONDecodeError:
+                                continue
+                    result = json_objects
+                else:
+                    try:
+                        result = response.json()
+                    except json.JSONDecodeError:
+                        result = response_text
+                
+                logger.info(f"ChatGPT data retrieved synchronously for {len(prompts)} prompt(s)")
+                print(f"Retrieved {len(result) if isinstance(result, list) else 1} ChatGPT response(s)")
+            else:
+                result = response.json()
+                snapshot_id = result.get('snapshot_id')
+                if snapshot_id:
+                    logger.info(f"ChatGPT scraping job initiated successfully for {len(prompts)} prompt(s)")
+                    print("")
+                    print("Snapshot ID:")
+                    print(snapshot_id)
+                    print("")
+            
+            return result
+            
+        except requests.exceptions.Timeout:
+            raise APIError("Timeout while initiating ChatGPT scraping")
+        except requests.exceptions.RequestException as e:
+            raise APIError(f"Network error during ChatGPT scraping: {str(e)}")
+        except json.JSONDecodeError as e:
+            raise APIError(f"Failed to parse ChatGPT scraping response: {str(e)}")
+        except Exception as e:
+            if isinstance(e, (ValidationError, AuthenticationError, APIError)):
+                raise
+            raise APIError(f"Unexpected error during ChatGPT scraping: {str(e)}")
\ No newline at end of file
diff --git a/src/api/crawl.py b/src/api/crawl.py
new file mode 100644
index 0000000..4fe047a
--- /dev/null
+++ b/src/api/crawl.py
@@ -0,0 +1,175 @@
+import json
+from typing import Union, Dict, Any, List, Optional
+from ..utils import get_logger, validate_url
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.crawl')
+
+
+class CrawlAPI:
+    """Handles crawl operations using Bright Data's Web Crawl API"""
+    
+    CRAWL_DATASET_ID = "gd_m6gjtfmeh43we6cqc"
+    
+    AVAILABLE_OUTPUT_FIELDS = [
+        "markdown", "url", "html2text", "page_html", "ld_json", 
+        "page_title", "timestamp", "input", "discovery_input", 
+        "error", "error_code", "warning", "warning_code"
+    ]
+    
+    def __init__(self, session, api_token, default_timeout=30, max_retries=3, retry_backoff=1.5):
+        self.session = session
+        self.api_token = api_token
+        self.default_timeout = default_timeout
+        self.max_retries = max_retries
+        self.retry_backoff = retry_backoff
+    
+    def crawl(
+        self,
+        url: Union[str, List[str]],
+        ignore_sitemap: Optional[bool] = None,
+        depth: Optional[int] = None,
+        filter: Optional[str] = None,
+        exclude_filter: Optional[str] = None,
+        custom_output_fields: Optional[List[str]] = None,
+        include_errors: bool = True
+    ) -> Dict[str, Any]:
+        """
+        ## Crawl websites using Bright Data's Web Crawl API
+        
+        Performs web crawling to discover and scrape multiple pages from a website
+        starting from the specified URL(s).
+        
+        ### Parameters:
+        - `url` (str | List[str]): Domain URL(s) to crawl (required)
+        - `ignore_sitemap` (bool, optional): Ignore sitemap when crawling
+        - `depth` (int, optional): Maximum depth to crawl relative to the entered URL
+        - `filter` (str, optional): Regular expression to include only certain URLs (e.g. "/product/")
+        - `exclude_filter` (str, optional): Regular expression to exclude certain URLs (e.g. "/ads/")
+        - `custom_output_fields` (List[str], optional): Custom output schema fields to include
+        - `include_errors` (bool, optional): Include errors in response (default: True)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Crawl response with snapshot_id for tracking
+        
+        ### Example Usage:
+        ```python
+        # Single URL crawl
+        result = client.crawl("https://example.com/")
+        
+        # Multiple URLs with filters
+        urls = ["https://example.com/", "https://example2.com/"]
+        result = client.crawl(
+            url=urls,
+            filter="/product/",
+            exclude_filter="/ads/",
+            depth=2,
+            ignore_sitemap=True
+        )
+        
+        # Custom output schema
+        result = client.crawl(
+            url="https://example.com/",
+            custom_output_fields=["markdown", "url", "page_title"]
+        )
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid URL or parameters
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed or server error
+        """
+        if isinstance(url, str):
+            urls = [url]
+        elif isinstance(url, list):
+            urls = url
+        else:
+            raise ValidationError("URL must be a string or list of strings")
+        
+        if not urls:
+            raise ValidationError("At least one URL is required")
+        
+        for u in urls:
+            if not isinstance(u, str) or not u.strip():
+                raise ValidationError("All URLs must be non-empty strings")
+            validate_url(u)
+        
+        if custom_output_fields is not None:
+            if not isinstance(custom_output_fields, list):
+                raise ValidationError("custom_output_fields must be a list")
+            
+            invalid_fields = [field for field in custom_output_fields if field not in self.AVAILABLE_OUTPUT_FIELDS]
+            if invalid_fields:
+                raise ValidationError(f"Invalid output fields: {invalid_fields}. Available fields: {self.AVAILABLE_OUTPUT_FIELDS}")
+        
+        crawl_inputs = []
+        for u in urls:
+            crawl_input = {"url": u}
+            
+            if ignore_sitemap is not None:
+                crawl_input["ignore_sitemap"] = ignore_sitemap
+            if depth is not None:
+                crawl_input["depth"] = depth
+            if filter is not None:
+                crawl_input["filter"] = filter
+            if exclude_filter is not None:
+                crawl_input["exclude_filter"] = exclude_filter
+                
+            crawl_inputs.append(crawl_input)
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+        
+        params = {
+            "dataset_id": self.CRAWL_DATASET_ID,
+            "include_errors": str(include_errors).lower(),
+            "type": "discover_new",
+            "discover_by": "domain_url"
+        }
+        
+        if custom_output_fields:
+            payload = {
+                "input": crawl_inputs,
+                "custom_output_fields": custom_output_fields
+            }
+        else:
+            payload = crawl_inputs
+        
+        logger.info(f"Starting crawl for {len(urls)} URL(s)")
+        logger.debug(f"Crawl parameters: depth={depth}, filter={filter}, exclude_filter={exclude_filter}")
+        
+        try:
+            response = self.session.post(
+                api_url,
+                params=params,
+                json=payload,
+                timeout=self.default_timeout
+            )
+            
+            if response.status_code == 200:
+                result = response.json()
+                snapshot_id = result.get('snapshot_id')
+                logger.info(f"Crawl initiated successfully. Snapshot ID: {snapshot_id}")
+                return result
+                
+            elif response.status_code == 401:
+                logger.error("Unauthorized (401): Check API token")
+                raise AuthenticationError(f"Unauthorized (401): Check your API token. {response.text}")
+            elif response.status_code == 403:
+                logger.error("Forbidden (403): Insufficient permissions")
+                raise AuthenticationError(f"Forbidden (403): Insufficient permissions. {response.text}")
+            elif response.status_code == 400:
+                logger.error(f"Bad request (400): {response.text}")
+                raise APIError(f"Bad request (400): {response.text}")
+            else:
+                logger.error(f"Crawl request failed ({response.status_code}): {response.text}")
+                raise APIError(
+                    f"Crawl request failed ({response.status_code}): {response.text}",
+                    status_code=response.status_code,
+                    response_text=response.text
+                )
+                
+        except Exception as e:
+            if isinstance(e, (ValidationError, AuthenticationError, APIError)):
+                raise
+            logger.error(f"Unexpected error during crawl: {e}")
+            raise APIError(f"Unexpected error during crawl: {str(e)}")
\ No newline at end of file
diff --git a/src/api/download.py b/src/api/download.py
new file mode 100644
index 0000000..4bccdc0
--- /dev/null
+++ b/src/api/download.py
@@ -0,0 +1,265 @@
+import json
+import requests
+from datetime import datetime
+from typing import Union, Dict, Any, List
+
+from ..utils import get_logger
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.download')
+
+
+class DownloadAPI:
+    """Handles snapshot and content download operations using Bright Data's download API"""
+    
+    def __init__(self, session, api_token, default_timeout=30):
+        self.session = session
+        self.api_token = api_token
+        self.default_timeout = default_timeout
+    
+    def download_content(self, content: Union[Dict, str], filename: str = None, format: str = "json", parse: bool = False) -> str:
+        """
+        ## Download content to a file based on its format
+        
+        ### Args:
+            content: The content to download (dict for JSON, string for other formats)
+            filename: Optional filename. If not provided, generates one with timestamp
+            format: Format of the content ("json", "csv", "ndjson", "jsonl", "txt")
+            parse: If True, automatically parse JSON strings in 'body' fields to objects (default: False)
+        
+        ### Returns:
+            Path to the downloaded file
+        """
+        
+        if not filename:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            filename = f"brightdata_results_{timestamp}.{format}"
+        
+        if not filename.endswith(f".{format}"):
+            filename = f"{filename}.{format}"
+        
+        if parse and isinstance(content, (list, dict)):
+            content = self._parse_body_json(content)
+        
+        try:
+            if format == "json":
+                with open(filename, 'w', encoding='utf-8') as f:
+                    if isinstance(content, dict) or isinstance(content, list):
+                        json.dump(content, f, indent=2, ensure_ascii=False)
+                    else:
+                        f.write(str(content))
+            else:
+                with open(filename, 'w', encoding='utf-8') as f:
+                    f.write(str(content))
+            
+            logger.info(f"Content downloaded to: {filename}")
+            return filename
+            
+        except IOError as e:
+            raise APIError(f"Failed to write file {filename}: {str(e)}")
+        except Exception as e:
+            raise APIError(f"Failed to download content: {str(e)}")
+    
+    def download_snapshot(
+        self,
+        snapshot_id: str,
+        format: str = "json",
+        compress: bool = False,
+        batch_size: int = None,
+        part: int = None
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]], str]:
+        """
+        ## Download snapshot content from Bright Data dataset API
+        
+        Downloads the snapshot content using the snapshot ID returned from scrape_chatGPT() 
+        or other dataset collection triggers.
+        
+        ### Parameters:
+        - `snapshot_id` (str): The snapshot ID returned when collection was triggered (required)
+        - `format` (str, optional): Format of the data - "json", "ndjson", "jsonl", or "csv" (default: "json")
+        - `compress` (bool, optional): Whether the result should be compressed (default: False)
+        - `batch_size` (int, optional): Divide into batches of X records (minimum: 1000)
+        - `part` (int, optional): If batch_size provided, specify which part to download
+        
+        ### Returns:
+        - `Union[Dict, List, str]`: Snapshot data in the requested format
+        
+        ### Example Usage:
+        ```python
+        # Download complete snapshot
+        data = client.download_snapshot("s_m4x7enmven8djfqak")
+        
+        # Download as CSV format
+        csv_data = client.download_snapshot("s_m4x7enmven8djfqak", format="csv")
+        
+        # Download in batches
+        batch_data = client.download_snapshot(
+            "s_m4x7enmven8djfqak", 
+            batch_size=1000, 
+            part=1
+        )
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid parameters or snapshot_id format
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed, snapshot not found, or server error
+        """
+        if not snapshot_id or not isinstance(snapshot_id, str):
+            raise ValidationError("Snapshot ID is required and must be a non-empty string")
+        
+        if format not in ["json", "ndjson", "jsonl", "csv"]:
+            raise ValidationError("Format must be one of: json, ndjson, jsonl, csv")
+        
+        if not isinstance(compress, bool):
+            raise ValidationError("Compress must be a boolean")
+        
+        if batch_size is not None:
+            if not isinstance(batch_size, int) or batch_size < 1000:
+                raise ValidationError("Batch size must be an integer >= 1000")
+        
+        if part is not None:
+            if not isinstance(part, int) or part < 1:
+                raise ValidationError("Part must be a positive integer")
+            if batch_size is None:
+                raise ValidationError("Part parameter requires batch_size to be specified")
+        
+        url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}"
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Accept": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "format": format
+        }
+        
+        if compress:
+            params["compress"] = "true"
+        
+        if batch_size is not None:
+            params["batch_size"] = batch_size
+            
+        if part is not None:
+            params["part"] = part
+        
+        try:
+            logger.info(f"Downloading snapshot {snapshot_id} in {format} format")
+            
+            response = self.session.get(
+                url, 
+                headers=headers, 
+                params=params, 
+                timeout=self.default_timeout
+            )
+            
+            if response.status_code == 200:
+                pass
+            elif response.status_code == 202:
+                try:
+                    response_data = response.json()
+                    message = response_data.get('message', 'Snapshot is not ready yet')
+                    print("Snapshot is not ready yet, try again soon")
+                    return {"status": "not_ready", "message": message, "snapshot_id": snapshot_id}
+                except json.JSONDecodeError:
+                    print("Snapshot is not ready yet, try again soon")
+                    return {"status": "not_ready", "message": "Snapshot is not ready yet, check again soon", "snapshot_id": snapshot_id}
+            elif response.status_code == 401:
+                raise AuthenticationError("Invalid API token or insufficient permissions")
+            elif response.status_code == 404:
+                raise APIError(f"Snapshot '{snapshot_id}' not found")
+            else:
+                raise APIError(f"Download request failed with status {response.status_code}: {response.text}")
+            
+            if format == "csv":
+                data = response.text
+                save_data = data
+            else:
+                response_text = response.text
+                if '\n{' in response_text and response_text.strip().startswith('{'):
+                    json_objects = []
+                    for line in response_text.strip().split('\n'):
+                        if line.strip():
+                            try:
+                                json_objects.append(json.loads(line))
+                            except json.JSONDecodeError:
+                                continue
+                    data = json_objects
+                    save_data = json_objects
+                else:
+                    try:
+                        data = response.json()
+                        save_data = data
+                    except json.JSONDecodeError:
+                        data = response_text
+                        save_data = response_text
+            
+            try:
+                output_file = f"snapshot_{snapshot_id}.{format}"
+                if format == "csv" or isinstance(save_data, str):
+                    with open(output_file, 'w', encoding='utf-8') as f:
+                        f.write(str(save_data))
+                else:
+                    with open(output_file, 'w', encoding='utf-8') as f:
+                        json.dump(save_data, f, indent=2, ensure_ascii=False)
+                logger.info(f"Data saved to: {output_file}")
+            except Exception:
+                pass
+            
+            logger.info(f"Successfully downloaded snapshot {snapshot_id}")
+            return data
+            
+        except requests.exceptions.Timeout:
+            raise APIError("Timeout while downloading snapshot")
+        except requests.exceptions.RequestException as e:
+            raise APIError(f"Network error during snapshot download: {str(e)}")
+        except Exception as e:
+            if isinstance(e, (ValidationError, AuthenticationError, APIError)):
+                raise
+            raise APIError(f"Unexpected error during snapshot download: {str(e)}")
+    
+    def _parse_body_json(self, content: Union[Dict, List]) -> Union[Dict, List]:
+        """
+        Parse JSON strings in 'body' fields to objects
+        
+        Args:
+            content: The content to process
+            
+        Returns:
+            Content with parsed body fields
+        """
+        if content is None:
+            return content
+            
+        if isinstance(content, list):
+            for item in content:
+                if isinstance(item, dict) and 'body' in item:
+                    body = item['body']
+                    if isinstance(body, str):
+                        try:
+                            item['body'] = json.loads(body)
+                        except (json.JSONDecodeError, TypeError):
+                            pass
+                elif isinstance(item, (dict, list)):
+                    self._parse_body_json(item)
+                    
+        elif isinstance(content, dict):
+            if 'body' in content:
+                body = content['body']
+                if isinstance(body, str):
+                    try:
+                        content['body'] = json.loads(body)
+                    except (json.JSONDecodeError, TypeError):
+                        pass
+            
+            for key, value in content.items():
+                if isinstance(value, (dict, list)):
+                    content[key] = self._parse_body_json(value)
+                    
+        return content
\ No newline at end of file
diff --git a/src/api/extract.py b/src/api/extract.py
new file mode 100644
index 0000000..1b04b84
--- /dev/null
+++ b/src/api/extract.py
@@ -0,0 +1,419 @@
+import os
+import re
+import json
+import openai
+from typing import Dict, Any, Tuple, Union, List
+from urllib.parse import urlparse
+
+from ..utils import get_logger
+from ..exceptions import ValidationError, APIError
+
+logger = get_logger('api.extract')
+
+
+class ExtractResult(str):
+    """
+    Custom result class that behaves like a string (extracted content) 
+    but also provides access to metadata attributes
+    """
+    def __new__(cls, extracted_content, metadata):
+        obj = str.__new__(cls, extracted_content)
+        obj._metadata = metadata
+        return obj
+    
+    def __getattr__(self, name):
+        if name in self._metadata:
+            return self._metadata[name]
+        raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
+    
+    def __getitem__(self, key):
+        return self._metadata[key]
+    
+    def get(self, key, default=None):
+        return self._metadata.get(key, default)
+    
+    def keys(self):
+        return self._metadata.keys()
+    
+    def values(self):
+        return self._metadata.values()
+    
+    def items(self):
+        return self._metadata.items()
+    
+    @property
+    def metadata(self):
+        """Access full metadata dictionary"""
+        return self._metadata
+
+
+class ExtractAPI:
+    """Handles content extraction using web scraping + LLM processing"""
+    
+    def __init__(self, client):
+        self.client = client
+    
+    def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme: Dict[str, Any] = None, llm_key: str = None) -> Dict[str, Any]:
+        """
+        ## Extract specific information from websites using AI
+        
+        Combines web scraping with OpenAI's language models to extract targeted information
+        from web pages based on natural language queries.
+        
+        ### Parameters:
+        - `query` (str): Natural language query describing what to extract. If `url` parameter is provided,
+                        this becomes the pure extraction query. If `url` is not provided, this should include 
+                        the URL (e.g. "extract the most recent news from cnn.com")
+        - `url` (str | List[str], optional): Direct URL(s) to scrape. If provided, bypasses URL extraction 
+                        from query and sends these URLs to the web unlocker API
+        - `output_scheme` (dict, optional): JSON Schema defining the expected structure for the LLM response.
+                        Uses OpenAI's Structured Outputs for reliable type-safe responses.
+                        Example: {"type": "object", "properties": {"title": {"type": "string"}, "date": {"type": "string"}}, "required": ["title", "date"]}
+        - `llm_key` (str, optional): OpenAI API key. If not provided, uses OPENAI_API_KEY env variable
+        
+        ### Returns:
+        - `ExtractResult`: String containing extracted content with metadata attributes access
+        
+        ### Example Usage:
+        ```python
+        # Using URL parameter with structured output
+        result = client.extract(
+            query="extract the most recent news headlines",
+            url="https://cnn.com",
+            output_scheme={
+                "type": "object",
+                "properties": {
+                    "headlines": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "title": {"type": "string"},
+                                "date": {"type": "string"}
+                            },
+                            "required": ["title", "date"]
+                        }
+                    }
+                },
+                "required": ["headlines"]
+            }
+        )
+        
+        # Using URL in query (original behavior)
+        result = client.extract(
+            query="extract the most recent news from cnn.com",
+            llm_key="your-openai-api-key"
+        )
+        
+        # Multiple URLs with structured schema
+        result = client.extract(
+            query="extract main headlines", 
+            url=["https://cnn.com", "https://bbc.com"],
+            output_scheme={
+                "type": "object",
+                "properties": {
+                    "sources": {
+                        "type": "array",
+                        "items": {
+                            "type": "object", 
+                            "properties": {
+                                "source_name": {"type": "string"},
+                                "headlines": {"type": "array", "items": {"type": "string"}}
+                            },
+                            "required": ["source_name", "headlines"]
+                        }
+                    }
+                },
+                "required": ["sources"]
+            }
+        )
+        ```
+        
+        ### Raises:
+        - `ValidationError`: Invalid query format or missing LLM key
+        - `APIError`: Scraping failed or LLM processing error
+        """
+        if not query or not isinstance(query, str):
+            raise ValidationError("Query must be a non-empty string")
+        
+        query = query.strip()
+        if len(query) > 10000:
+            raise ValidationError("Query is too long (maximum 10,000 characters)")
+        if len(query) < 5:
+            raise ValidationError("Query is too short (minimum 5 characters)")
+        
+        if not llm_key:
+            llm_key = os.getenv('OPENAI_API_KEY')
+        
+        if not llm_key or not isinstance(llm_key, str):
+            raise ValidationError("OpenAI API key is required. Provide it as parameter or set OPENAI_API_KEY environment variable")
+        
+        if output_scheme is not None:
+            if not isinstance(output_scheme, dict):
+                raise ValidationError("output_scheme must be a dict containing a valid JSON Schema")
+            if "type" not in output_scheme:
+                raise ValidationError("output_scheme must have a 'type' property")
+            
+            self._validate_structured_outputs_schema(output_scheme)
+        
+        logger.info(f"Processing extract query: {query[:50]}...")
+        
+        try:
+            if url is not None:
+                parsed_query = query.strip()
+                target_urls = url if isinstance(url, list) else [url]
+                logger.info(f"Using provided URL(s): {target_urls}")
+            else:
+                parsed_query, extracted_url = self._parse_query_and_url(query)
+                target_urls = [extracted_url]
+                logger.info(f"Parsed - Query: '{parsed_query}', URL: '{extracted_url}'")
+            
+            if len(target_urls) == 1:
+                scraped_content = self.client.scrape(target_urls[0], response_format="raw")
+                source_url = target_urls[0]
+            else:
+                scraped_content = self.client.scrape(target_urls, response_format="raw")
+                source_url = ', '.join(target_urls)
+            
+            logger.info(f"Scraped content from {len(target_urls)} URL(s)")
+            
+            if isinstance(scraped_content, list):
+                all_text = []
+                all_titles = []
+                for i, content in enumerate(scraped_content):
+                    parsed = self.client.parse_content(
+                        content, 
+                        extract_text=True, 
+                        extract_links=False, 
+                        extract_images=False
+                    )
+                    all_text.append(f"--- Content from {target_urls[i]} ---\n{parsed.get('text', '')}")
+                    all_titles.append(parsed.get('title', 'Unknown'))
+                
+                combined_text = "\n\n".join(all_text)
+                combined_title = " | ".join(all_titles)
+                parsed_content = {'text': combined_text, 'title': combined_title}
+            else:
+                parsed_content = self.client.parse_content(
+                    scraped_content, 
+                    extract_text=True, 
+                    extract_links=False, 
+                    extract_images=False
+                )
+                
+            logger.info(f"Parsed content - text length: {len(parsed_content.get('text', ''))}")
+            
+            extracted_info, token_usage = self._process_with_llm(
+                parsed_query, 
+                parsed_content.get('text', ''), 
+                llm_key,
+                source_url,
+                output_scheme
+            )
+            
+            metadata = {
+                'query': parsed_query,
+                'url': source_url,
+                'extracted_content': extracted_info,
+                'source_title': parsed_content.get('title', 'Unknown'),
+                'content_length': len(parsed_content.get('text', '')),
+                'token_usage': token_usage,
+                'success': True
+            }
+            
+            return ExtractResult(extracted_info, metadata)
+            
+        except Exception as e:
+            if isinstance(e, (ValidationError, APIError)):
+                raise
+            logger.error(f"Unexpected error during extraction: {e}")
+            raise APIError(f"Extraction failed: {str(e)}")
+    
+    def _parse_query_and_url(self, query: str) -> Tuple[str, str]:
+        """
+        Parse natural language query to extract the task and URL
+        
+        Args:
+            query: Natural language query like "extract news from cnn.com"
+            
+        Returns:
+            Tuple of (parsed_query, full_url)
+        """
+        query = query.strip()
+        
+        url_patterns = [
+            r'from\s+((?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*)',
+            r'on\s+((?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*)',
+            r'at\s+((?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*)',
+            r'((?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*)'
+        ]
+        
+        url = None
+        for pattern in url_patterns:
+            match = re.search(pattern, query, re.IGNORECASE)
+            if match:
+                url = match.group(1)
+                break
+        
+        if not url:
+            raise ValidationError("Could not extract URL from query. Please include a website URL.")
+        
+        full_url = self._build_full_url(url)
+        
+        extract_query = re.sub(r'\b(?:from|on|at)\s+(?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*', '', query, flags=re.IGNORECASE)
+        extract_query = re.sub(r'\b(?:https?://)?(?:www\.)?[\w\.-]+(?:\.[\w]{2,})+(?:/[\w\.-]*)*', '', extract_query, flags=re.IGNORECASE)
+        extract_query = re.sub(r'\s+', ' ', extract_query).strip()
+        
+        if not extract_query:
+            extract_query = "extract the main content"
+        
+        return extract_query, full_url
+    
+    def _build_full_url(self, url: str) -> str:
+        """
+        Build a complete URL from potentially partial URL
+        
+        Args:
+            url: Potentially partial URL like "cnn.com" or "https://example.com"
+            
+        Returns:
+            Complete URL with https:// and www if needed
+        """
+        url = url.strip()
+        
+        if not url.startswith(('http://', 'https://')):
+            if not url.startswith('www.'):
+                url = f'www.{url}'
+            url = f'https://{url}'
+        
+        parsed = urlparse(url)
+        if not parsed.netloc:
+            raise ValidationError(f"Invalid URL format: {url}")
+        
+        return url
+    
+    def _validate_structured_outputs_schema(self, schema: Dict[str, Any], path: str = "") -> None:
+        """
+        Validate JSON Schema for OpenAI Structured Outputs compatibility
+        
+        Args:
+            schema: JSON Schema to validate
+            path: Current path in schema (for error reporting)
+        """
+        if not isinstance(schema, dict):
+            return
+            
+        schema_type = schema.get("type")
+        
+        if schema_type == "object":
+            if "properties" not in schema:
+                raise ValidationError(f"Object schema at '{path}' must have 'properties' defined")
+            if "required" not in schema:
+                raise ValidationError(f"Object schema at '{path}' must have 'required' array (OpenAI Structured Outputs requirement)")
+            if "additionalProperties" not in schema or schema["additionalProperties"] is not False:
+                raise ValidationError(f"Object schema at '{path}' must have 'additionalProperties': false (OpenAI Structured Outputs requirement)")
+                
+            properties = set(schema["properties"].keys())
+            required = set(schema["required"])
+            if properties != required:
+                missing = properties - required
+                extra = required - properties
+                error_msg = f"OpenAI Structured Outputs requires ALL properties to be in 'required' array at '{path}'."
+                if missing:
+                    error_msg += f" Missing from required: {list(missing)}"
+                if extra:
+                    error_msg += f" Extra in required: {list(extra)}"
+                raise ValidationError(error_msg)
+                
+            for prop_name, prop_schema in schema["properties"].items():
+                self._validate_structured_outputs_schema(prop_schema, f"{path}.{prop_name}")
+                
+        elif schema_type == "array":
+            if "items" in schema:
+                self._validate_structured_outputs_schema(schema["items"], f"{path}[]")
+    
+    def _process_with_llm(self, query: str, content: str, llm_key: str, source_url: str, output_scheme: Dict[str, Any] = None) -> Tuple[str, Dict[str, int]]:
+        """
+        Process scraped content with OpenAI to extract requested information
+        
+        Args:
+            query: What to extract from the content
+            content: Scraped and parsed text content
+            llm_key: OpenAI API key
+            source_url: Source URL for context
+            output_scheme: JSON Schema dict for structured outputs (optional)
+            
+        Returns:
+            Tuple of (extracted information, token usage dict)
+        """
+        if len(content) > 15000:
+            beginning = content[:8000]
+            end = content[-4000:]
+            content = f"{beginning}\n\n... [middle content truncated for token efficiency] ...\n\n{end}"
+        elif len(content) > 12000:
+            content = content[:12000] + "\n\n... [content truncated to optimize tokens]"
+        
+        client = openai.OpenAI(api_key=llm_key)
+        
+        system_prompt = f"""You are a precise web content extraction specialist. Your task: {query}
+
+SOURCE: {source_url}
+
+INSTRUCTIONS:
+1. Extract ONLY the specific information requested
+2. Include relevant details (dates, numbers, names) when available
+3. If requested info isn't found, briefly state what content IS available
+4. Keep response concise but complete
+5. Be accurate and factual"""
+
+        user_prompt = f"CONTENT TO ANALYZE:\n\n{content}\n\nEXTRACT: {query}"
+        
+        try:
+            call_params = {
+                "model": "gpt-4o-2024-08-06",
+                "messages": [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": user_prompt}
+                ],
+                "max_tokens": 1000,
+                "temperature": 0.1
+            }
+            
+            if output_scheme:
+                call_params["response_format"] = {
+                    "type": "json_schema",
+                    "json_schema": {
+                        "name": "extracted_content",
+                        "strict": True,
+                        "schema": output_scheme
+                    }
+                }
+                logger.info("Using OpenAI Structured Outputs with provided schema")
+            else:
+                logger.info("Using regular OpenAI completion (no structured schema provided)")
+            
+            response = client.chat.completions.create(**call_params)
+            
+            if not response.choices or not response.choices[0].message.content:
+                raise APIError("OpenAI returned empty response")
+                
+            extracted_content = response.choices[0].message.content.strip()
+            
+            if output_scheme:
+                logger.info("Received structured JSON response from OpenAI")
+            else:
+                logger.info("Received text response from OpenAI")
+            
+            token_usage = {
+                'prompt_tokens': response.usage.prompt_tokens,
+                'completion_tokens': response.usage.completion_tokens,
+                'total_tokens': response.usage.total_tokens
+            }
+            
+            logger.info(f"OpenAI token usage: {token_usage['total_tokens']} total ({token_usage['prompt_tokens']} prompt + {token_usage['completion_tokens']} completion)")
+            
+            return extracted_content, token_usage
+            
+        except Exception as e:
+            logger.error(f"OpenAI API error: {e}")
+            raise APIError(f"Failed to process content with LLM: {str(e)}")
\ No newline at end of file
diff --git a/src/api/linkedin.py b/src/api/linkedin.py
new file mode 100644
index 0000000..19ede6b
--- /dev/null
+++ b/src/api/linkedin.py
@@ -0,0 +1,803 @@
+import json
+import re
+import requests
+from typing import Union, Dict, Any, List
+
+from ..utils import get_logger
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.linkedin')
+
+
+class LinkedInAPI:
+    """Handles LinkedIn data collection using Bright Data's collect API"""
+    
+    DATASET_IDS = {
+        'profile': 'gd_l1viktl72bvl7bjuj0',
+        'company': 'gd_l1vikfnt1wgvvqz95w', 
+        'job': 'gd_lpfll7v5hcqtkxl6l',
+        'post': 'gd_lyy3tktm25m4avu764'
+    }
+    
+    URL_PATTERNS = {
+        'profile': re.compile(r'linkedin\.com/in/[^/?]+/?(\?.*)?$'),
+        'company': re.compile(r'linkedin\.com/(company|organization-guest/company)/[^/?]+/?(\?.*)?$'),
+        'job': re.compile(r'linkedin\.com/jobs/view/[^/?]+/?(\?.*)?$'),
+        'post': re.compile(r'linkedin\.com/(posts|pulse)/[^/?]+/?(\?.*)?$')
+    }
+    
+    def __init__(self, session, api_token, default_timeout=30, max_retries=3, retry_backoff=1.5):
+        self.session = session
+        self.api_token = api_token
+        self.default_timeout = default_timeout
+        self.max_retries = max_retries
+        self.retry_backoff = retry_backoff
+    
+    def _identify_dataset_type(self, url: str) -> str:
+        """
+        Identify LinkedIn dataset type based on URL pattern
+        
+        Args:
+            url: LinkedIn URL to analyze
+            
+        Returns:
+            Dataset type ('profile', 'company', 'job', 'post')
+            
+        Raises:
+            ValidationError: If URL doesn't match any known LinkedIn pattern
+        """
+        if not url or not isinstance(url, str):
+            raise ValidationError("URL must be a non-empty string")
+        
+        url = url.strip().lower()
+        for dataset_type, pattern in self.URL_PATTERNS.items():
+            if pattern.search(url):
+                logger.debug(f"URL '{url}' identified as LinkedIn {dataset_type}")
+                return dataset_type
+        
+        raise ValidationError(f"URL '{url}' does not match any supported LinkedIn data type")
+    
+    def _scrape_linkedin_dataset(
+        self,
+        urls: Union[str, List[str]],
+        dataset_id: str,
+        dataset_type: str,
+        sync: bool = True,
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """
+        Internal method to scrape LinkedIn data using Bright Data's collect API
+        
+        Args:
+            urls: Single LinkedIn URL or list of LinkedIn URLs
+            dataset_id: Bright Data dataset ID for the specific LinkedIn data type
+            dataset_type: Type of LinkedIn data (for logging purposes)
+            sync: If True (default), uses synchronous API for immediate results
+            timeout: Request timeout in seconds
+            
+        Returns:
+            Dict containing response with snapshot_id or direct data (if sync=True)
+            
+        Raises:
+            ValidationError: Invalid URL format
+            AuthenticationError: Invalid API token or insufficient permissions
+            APIError: Request failed or server error
+        """
+        if isinstance(urls, str):
+            url_list = [urls]
+        else:
+            url_list = urls
+            
+        if not url_list or len(url_list) == 0:
+            raise ValidationError("At least one URL is required")
+        for url in url_list:
+            if not url or not isinstance(url, str):
+                raise ValidationError("All URLs must be non-empty strings")
+        
+        logger.info(f"Processing {len(url_list)} LinkedIn {dataset_type} URL(s) {'synchronously' if sync else 'asynchronously'}")
+        
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        
+        if sync:
+            api_url = "https://api.brightdata.com/datasets/v3/scrape"
+            data = {
+                "input": [{"url": url} for url in url_list]
+            }
+            params = {
+                "dataset_id": dataset_id,
+                "notify": "false",
+                "include_errors": "true"
+            }
+        else:
+            api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            data = [{"url": url} for url in url_list]
+            params = {
+                "dataset_id": dataset_id,
+                "include_errors": "true"
+            }
+        
+        try:
+            if sync:
+                response = self.session.post(
+                    api_url,
+                    headers=headers,
+                    params=params,
+                    json=data,
+                    timeout=timeout or 65
+                )
+            else:
+                response = self.session.post(
+                    api_url,
+                    headers=headers,
+                    params=params,
+                    json=data,
+                    timeout=timeout or self.default_timeout
+                )
+            
+            if response.status_code == 401:
+                raise AuthenticationError("Invalid API token or insufficient permissions")
+            elif response.status_code not in [200, 202]:
+                raise APIError(f"LinkedIn data collection request failed with status {response.status_code}: {response.text}")
+            
+            if sync:
+                response_text = response.text
+                if '\n{' in response_text and response_text.strip().startswith('{'):
+                    json_objects = []
+                    for line in response_text.strip().split('\n'):
+                        if line.strip():
+                            try:
+                                json_objects.append(json.loads(line))
+                            except json.JSONDecodeError:
+                                continue
+                    result = json_objects
+                else:
+                    try:
+                        result = response.json()
+                    except json.JSONDecodeError:
+                        result = response_text
+                
+                logger.info(f"LinkedIn {dataset_type} data retrieved synchronously for {len(url_list)} URL(s)")
+                print(f"Retrieved {len(result) if isinstance(result, list) else 1} LinkedIn {dataset_type} record(s)")
+            else:
+                result = response.json()
+                snapshot_id = result.get('snapshot_id')
+                if snapshot_id:
+                    logger.info(f"LinkedIn {dataset_type} data collection job initiated successfully for {len(url_list)} URL(s)")
+                    print("")
+                    print("Snapshot ID:")
+                    print(snapshot_id)
+                    print("")
+            
+            return result
+            
+        except requests.exceptions.Timeout:
+            raise APIError("Timeout while initiating LinkedIn data collection")
+        except requests.exceptions.RequestException as e:
+            raise APIError(f"Network error during LinkedIn data collection: {str(e)}")
+        except json.JSONDecodeError as e:
+            raise APIError(f"Failed to parse LinkedIn data collection response: {str(e)}")
+        except Exception as e:
+            if isinstance(e, (ValidationError, AuthenticationError, APIError)):
+                raise
+            raise APIError(f"Unexpected error during LinkedIn data collection: {str(e)}")
+
+
+class LinkedInScraper:
+    """LinkedIn data scraping interface with specialized methods for different data types"""
+    
+    def __init__(self, linkedin_api):
+        self.linkedin_api = linkedin_api
+    
+    def profiles(self, url: Union[str, List[str]], sync: bool = True, timeout: int = None) -> Dict[str, Any]:
+        """
+        ## Scrape LinkedIn Profile Data
+        
+        Scrapes structured data from LinkedIn profiles using the profiles dataset.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Single LinkedIn profile URL or list of profile URLs
+        - `sync` (bool, optional): If True (default), returns data immediately. If False, returns snapshot_id for async processing
+        - `timeout` (int, optional): Request timeout in seconds (default: 65 for sync, 30 for async)
+        
+        ### Returns:
+        - `Dict[str, Any]`: If sync=True, returns scraped profile data directly. If sync=False, returns response with snapshot_id for async processing
+        
+        ### Example URLs:
+        - `https://www.linkedin.com/in/username/`
+        - `https://linkedin.com/in/first-last-123456/`
+        
+        ### Example Usage:
+        ```python
+        # Single profile (synchronous - returns data immediately)
+        result = client.scrape_linkedin.profiles("https://www.linkedin.com/in/elad-moshe-05a90413/")
+        
+        # Multiple profiles (synchronous - returns data immediately)
+        profiles = [
+            "https://www.linkedin.com/in/user1/", 
+            "https://www.linkedin.com/in/user2/"
+        ]
+        result = client.scrape_linkedin.profiles(profiles)
+        
+        # Asynchronous processing (returns snapshot_id)
+        result = client.scrape_linkedin.profiles(profiles, sync=False)
+        ```
+        """
+        return self.linkedin_api._scrape_linkedin_dataset(
+            url, 
+            self.linkedin_api.DATASET_IDS['profile'], 
+            'profile', 
+            sync,
+            timeout
+        )
+    
+    def companies(self, url: Union[str, List[str]], sync: bool = True, timeout: int = None) -> Dict[str, Any]:
+        """
+        ## Scrape LinkedIn Company Data
+        
+        Scrapes structured data from LinkedIn company pages using the companies dataset.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Single LinkedIn company URL or list of company URLs
+        - `sync` (bool, optional): If True (default), returns data immediately. If False, returns snapshot_id for async processing
+        - `timeout` (int, optional): Request timeout in seconds (default: 65 for sync, 30 for async)
+        
+        ### Returns:
+        - `Dict[str, Any]`: If sync=True, returns scraped company data directly. If sync=False, returns response with snapshot_id for async processing
+        
+        ### Example URLs:
+        - `https://www.linkedin.com/company/company-name/`
+        - `https://linkedin.com/company/bright-data/`
+        
+        ### Example Usage:
+        ```python
+        # Single company (synchronous)
+        result = client.scrape_linkedin.companies("https://www.linkedin.com/company/bright-data/")
+        
+        # Multiple companies (synchronous)
+        companies = [
+            "https://www.linkedin.com/company/ibm/",
+            "https://www.linkedin.com/company/microsoft/"
+        ]
+        result = client.scrape_linkedin.companies(companies)
+        
+        # Asynchronous processing
+        result = client.scrape_linkedin.companies(companies, sync=False)
+        ```
+        """
+        return self.linkedin_api._scrape_linkedin_dataset(
+            url, 
+            self.linkedin_api.DATASET_IDS['company'], 
+            'company', 
+            sync,
+            timeout
+        )
+    
+    def jobs(self, url: Union[str, List[str]], sync: bool = True, timeout: int = None) -> Dict[str, Any]:
+        """
+        ## Scrape LinkedIn Job Data
+        
+        Scrapes structured data from LinkedIn job listings using the jobs dataset.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Single LinkedIn job URL or list of job URLs
+        - `sync` (bool, optional): If True (default), returns data immediately. If False, returns snapshot_id for async processing
+        - `timeout` (int, optional): Request timeout in seconds (default: 65 for sync, 30 for async)
+        
+        ### Returns:
+        - `Dict[str, Any]`: If sync=True, returns scraped job data directly. If sync=False, returns response with snapshot_id for async processing
+        
+        ### Example URLs:
+        - `https://www.linkedin.com/jobs/view/1234567890/`
+        - `https://linkedin.com/jobs/view/job-id/`
+        
+        ### Example Usage:
+        ```python
+        # Single job listing (synchronous)
+        result = client.scrape_linkedin.jobs("https://www.linkedin.com/jobs/view/1234567890/")
+        
+        # Multiple job listings (synchronous)
+        jobs = [
+            "https://www.linkedin.com/jobs/view/1111111/",
+            "https://www.linkedin.com/jobs/view/2222222/"
+        ]
+        result = client.scrape_linkedin.jobs(jobs)
+        
+        # Asynchronous processing
+        result = client.scrape_linkedin.jobs(jobs, sync=False)
+        ```
+        """
+        return self.linkedin_api._scrape_linkedin_dataset(
+            url, 
+            self.linkedin_api.DATASET_IDS['job'], 
+            'job', 
+            sync,
+            timeout
+        )
+    
+    def posts(self, url: Union[str, List[str]], sync: bool = True, timeout: int = None) -> Dict[str, Any]:
+        """
+        ## Scrape LinkedIn Post Data
+        
+        Scrapes structured data from LinkedIn posts and articles using the posts dataset.
+        
+        ### Parameters:
+        - `url` (str | List[str]): Single LinkedIn post URL or list of post URLs
+        - `sync` (bool, optional): If True (default), returns data immediately. If False, returns snapshot_id for async processing
+        - `timeout` (int, optional): Request timeout in seconds (default: 65 for sync, 30 for async)
+        
+        ### Returns:
+        - `Dict[str, Any]`: If sync=True, returns scraped post data directly. If sync=False, returns response with snapshot_id for async processing
+        
+        ### Example URLs:
+        - `https://www.linkedin.com/posts/username-activity-123456/`
+        - `https://www.linkedin.com/pulse/article-title-author/`
+        
+        ### Example Usage:
+        ```python
+        # Single post (synchronous)
+        result = client.scrape_linkedin.posts("https://www.linkedin.com/posts/user-activity-123/")
+        
+        # Multiple posts (synchronous)
+        posts = [
+            "https://www.linkedin.com/posts/user1-activity-111/",
+            "https://www.linkedin.com/pulse/article-author/"
+        ]
+        result = client.scrape_linkedin.posts(posts)
+        
+        # Asynchronous processing
+        result = client.scrape_linkedin.posts(posts, sync=False)
+        ```
+        """
+        return self.linkedin_api._scrape_linkedin_dataset(
+            url, 
+            self.linkedin_api.DATASET_IDS['post'], 
+            'post', 
+            sync,
+            timeout
+        )
+
+
+class LinkedInSearcher:
+    """LinkedIn search interface for discovering new LinkedIn data by various criteria"""
+    
+    def __init__(self, linkedin_api):
+        self.linkedin_api = linkedin_api
+    
+    def profiles(
+        self, 
+        first_name: Union[str, List[str]], 
+        last_name: Union[str, List[str]], 
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """
+        ## Search LinkedIn Profiles by Name
+        
+        Discovers LinkedIn profiles by searching for first and last names.
+        
+        ### Parameters:
+        - `first_name` (str | List[str]): Single first name or list of first names to search for
+        - `last_name` (str | List[str]): Single last name or list of last names to search for  
+        - `timeout` (int, optional): Request timeout in seconds (default: 30)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Response containing snapshot_id for async processing
+        
+        ### Example Usage:
+        ```python
+        # Single name search (returns snapshot_id)
+        result = client.search_linkedin.profiles("James", "Smith")
+        
+        # Multiple names search (returns snapshot_id)
+        first_names = ["James", "Idan"]
+        last_names = ["Smith", "Vilenski"]
+        result = client.search_linkedin.profiles(first_names, last_names)
+        ```
+        """
+        if isinstance(first_name, str):
+            first_names = [first_name]
+        else:
+            first_names = first_name
+            
+        if isinstance(last_name, str):
+            last_names = [last_name]
+        else:
+            last_names = last_name
+        
+        if len(first_names) != len(last_names):
+            raise ValidationError("first_name and last_name must have the same length")
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['profile'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "name"
+        }
+        
+        data = [
+            {
+                "first_name": first_names[i],
+                "last_name": last_names[i]
+            }
+            for i in range(len(first_names))
+        ]
+        
+        return self._make_request(api_url, headers, params, data, 'profile search', len(data), timeout)
+    
+    def jobs(
+        self,
+        url: Union[str, List[str]] = None,
+        location: Union[str, List[str]] = None,
+        keyword: Union[str, List[str]] = "",
+        country: Union[str, List[str]] = "",
+        time_range: Union[str, List[str]] = "",
+        job_type: Union[str, List[str]] = "",
+        experience_level: Union[str, List[str]] = "",
+        remote: Union[str, List[str]] = "",
+        company: Union[str, List[str]] = "",
+        location_radius: Union[str, List[str]] = "",
+        selective_search: Union[bool, List[bool]] = False,
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """
+        ## Search LinkedIn Jobs by URL or Keywords
+        
+        Discovers LinkedIn jobs either by searching specific job search URLs or by keyword criteria.
+        
+        ### Parameters:
+        - `url` (str | List[str], optional): LinkedIn job search URLs to scrape
+        - `location` (str | List[str], optional): Job location(s) - required when searching by keyword
+        - `keyword` (str | List[str], optional): Job keyword(s) to search for (default: "")
+        - `country` (str | List[str], optional): Country code(s) (default: "")
+        - `time_range` (str | List[str], optional): Time range filter (default: "")
+        - `job_type` (str | List[str], optional): Job type filter (default: "")
+        - `experience_level` (str | List[str], optional): Experience level filter (default: "")
+        - `remote` (str | List[str], optional): Remote work filter (default: "")
+        - `company` (str | List[str], optional): Company name filter (default: "")
+        - `location_radius` (str | List[str], optional): Location radius filter (default: "")
+        - `selective_search` (bool | List[bool], optional): Enable selective search (default: False)
+        - `timeout` (int, optional): Request timeout in seconds (default: 30)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Response containing snapshot_id for async processing
+        
+        ### Example Usage:
+        ```python
+        # Search by job URLs (returns snapshot_id)
+        job_urls = [
+            "https://www.linkedin.com/jobs/search?keywords=Software&location=Tel%20Aviv-Yafo",
+            "https://www.linkedin.com/jobs/reddit-inc.-jobs-worldwide?f_C=150573"
+        ]
+        result = client.search_linkedin.jobs(url=job_urls)
+        
+        # Search by keyword (returns snapshot_id)
+        result = client.search_linkedin.jobs(
+            location="Paris", 
+            keyword="product manager",
+            country="FR",
+            time_range="Past month",
+            job_type="Full-time"
+        )
+        ```
+        """
+        if url is not None:
+            return self._search_jobs_by_url(url, timeout)
+        elif location is not None:
+            return self._search_jobs_by_keyword(
+                location, keyword, country, time_range, job_type, 
+                experience_level, remote, company, location_radius, 
+                selective_search, timeout
+            )
+        else:
+            raise ValidationError("Either 'url' or 'location' parameter must be provided")
+    
+    def posts(
+        self,
+        profile_url: Union[str, List[str]] = None,
+        company_url: Union[str, List[str]] = None,
+        url: Union[str, List[str]] = None,
+        start_date: Union[str, List[str]] = "",
+        end_date: Union[str, List[str]] = "",
+        timeout: int = None
+    ) -> Dict[str, Any]:
+        """
+        ## Search LinkedIn Posts by Profile, Company, or General URL
+        
+        Discovers LinkedIn posts using various search methods.
+        
+        ### Parameters:
+        - `profile_url` (str | List[str], optional): LinkedIn profile URL(s) to get posts from
+        - `company_url` (str | List[str], optional): LinkedIn company URL(s) to get posts from
+        - `url` (str | List[str], optional): General LinkedIn URL(s) for posts
+        - `start_date` (str | List[str], optional): Start date filter (ISO format, default: "")
+        - `end_date` (str | List[str], optional): End date filter (ISO format, default: "")
+        - `timeout` (int, optional): Request timeout in seconds (default: 30)
+        
+        ### Returns:
+        - `Dict[str, Any]`: Response containing snapshot_id for async processing
+        
+        ### Example Usage:
+        ```python
+        # Search posts by profile URL with date range (returns snapshot_id)
+        result = client.search_linkedin.posts(
+            profile_url="https://www.linkedin.com/in/bettywliu",
+            start_date="2018-04-25T00:00:00.000Z",
+            end_date="2021-05-25T00:00:00.000Z"
+        )
+        
+        # Search posts by company URL (returns snapshot_id)
+        result = client.search_linkedin.posts(
+            company_url="https://www.linkedin.com/company/bright-data"
+        )
+        
+        # Search posts by general URL (returns snapshot_id)
+        result = client.search_linkedin.posts(
+            url="https://www.linkedin.com/posts/activity-123456"
+        )
+        ```
+        """
+        if profile_url is not None:
+            return self._search_posts_by_profile(profile_url, start_date, end_date, timeout)
+        elif company_url is not None:
+            return self._search_posts_by_company(company_url, timeout)
+        elif url is not None:
+            return self._search_posts_by_url(url, timeout)
+        else:
+            raise ValidationError("One of 'profile_url', 'company_url', or 'url' parameter must be provided")
+    
+    def _search_jobs_by_url(self, urls, timeout):
+        """Search jobs by LinkedIn job search URLs"""
+        if isinstance(urls, str):
+            url_list = [urls]
+        else:
+            url_list = urls
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['job'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "url"
+        }
+        
+        data = [{"url": url} for url in url_list]
+        return self._make_request(api_url, headers, params, data, 'job search by URL', len(data), timeout)
+    
+    def _search_jobs_by_keyword(self, location, keyword, country, time_range, job_type, experience_level, remote, company, location_radius, selective_search, timeout):
+        """Search jobs by keyword criteria"""
+        params_dict = {
+            'location': location, 'keyword': keyword, 'country': country,
+            'time_range': time_range, 'job_type': job_type, 'experience_level': experience_level,
+            'remote': remote, 'company': company, 'location_radius': location_radius,
+            'selective_search': selective_search
+        }
+        
+        max_length = 1
+        for key, value in params_dict.items():
+            if isinstance(value, list):
+                max_length = max(max_length, len(value))
+        normalized_params = {}
+        for key, value in params_dict.items():
+            if isinstance(value, list):
+                if len(value) != max_length and len(value) != 1:
+                    raise ValidationError(f"Parameter '{key}' list length must be 1 or {max_length}")
+                normalized_params[key] = value * max_length if len(value) == 1 else value
+            else:
+                normalized_params[key] = [value] * max_length
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['job'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "keyword"
+        }
+        
+        data = []
+        for i in range(max_length):
+            data.append({
+                "location": normalized_params['location'][i],
+                "keyword": normalized_params['keyword'][i],
+                "country": normalized_params['country'][i],
+                "time_range": normalized_params['time_range'][i],
+                "job_type": normalized_params['job_type'][i],
+                "experience_level": normalized_params['experience_level'][i],
+                "remote": normalized_params['remote'][i],
+                "company": normalized_params['company'][i],
+                "location_radius": normalized_params['location_radius'][i],
+                "selective_search": normalized_params['selective_search'][i]
+            })
+        
+        return self._make_request(api_url, headers, params, data, 'job search by keyword', len(data), timeout)
+    
+    def _search_posts_by_profile(self, profile_urls, start_dates, end_dates, timeout):
+        """Search posts by profile URL with optional date filtering"""
+        if isinstance(profile_urls, str):
+            url_list = [profile_urls]
+        else:
+            url_list = profile_urls
+            
+        if isinstance(start_dates, str):
+            start_list = [start_dates] * len(url_list)
+        else:
+            start_list = start_dates if len(start_dates) == len(url_list) else [start_dates[0]] * len(url_list)
+            
+        if isinstance(end_dates, str):
+            end_list = [end_dates] * len(url_list)
+        else:
+            end_list = end_dates if len(end_dates) == len(url_list) else [end_dates[0]] * len(url_list)
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['post'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "profile_url"
+        }
+        
+        data = []
+        for i in range(len(url_list)):
+            item = {"url": url_list[i]}
+            if start_list[i]:
+                item["start_date"] = start_list[i]
+            if end_list[i]:
+                item["end_date"] = end_list[i]
+            data.append(item)
+        
+        return self._make_request(api_url, headers, params, data, 'post search by profile', len(data), timeout)
+    
+    def _search_posts_by_company(self, company_urls, timeout):
+        """Search posts by company URL"""
+        if isinstance(company_urls, str):
+            url_list = [company_urls]
+        else:
+            url_list = company_urls
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['post'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "company_url"
+        }
+        
+        data = [{"url": url} for url in url_list]
+        return self._make_request(api_url, headers, params, data, 'post search by company', len(data), timeout)
+    
+    def _search_posts_by_url(self, urls, timeout):
+        """Search posts by general URL"""
+        if isinstance(urls, str):
+            url_list = [urls]
+        else:
+            url_list = urls
+        
+        api_url = "https://api.brightdata.com/datasets/v3/trigger"
+            
+        try:
+            from .. import __version__
+            user_agent = f"brightdata-sdk/{__version__}"
+        except ImportError:
+            user_agent = "brightdata-sdk/unknown"
+        
+        headers = {
+            "Authorization": f"Bearer {self.linkedin_api.api_token}",
+            "Content-Type": "application/json",
+            "User-Agent": user_agent
+        }
+        params = {
+            "dataset_id": self.linkedin_api.DATASET_IDS['post'],
+            "include_errors": "true",
+            "type": "discover_new",
+            "discover_by": "url"
+        }
+        
+        data = [{"url": url} for url in url_list]
+        return self._make_request(api_url, headers, params, data, 'post search by URL', len(data), timeout)
+    
+    def _make_request(self, api_url, headers, params, data, operation_type, count, timeout):
+        """Common method to make API requests (async only for search operations)"""
+        try:
+            response = self.linkedin_api.session.post(
+                api_url,
+                headers=headers,
+                params=params,
+                json=data,
+                timeout=timeout or self.linkedin_api.default_timeout
+            )
+            
+            if response.status_code == 401:
+                raise AuthenticationError("Invalid API token or insufficient permissions")
+            elif response.status_code != 200:
+                raise APIError(f"LinkedIn {operation_type} request failed with status {response.status_code}: {response.text}")
+            
+            result = response.json()
+            snapshot_id = result.get('snapshot_id')
+            if snapshot_id:
+                logger.info(f"LinkedIn {operation_type} job initiated successfully for {count} item(s)")
+                print("")
+                print("Snapshot ID:")
+                print(snapshot_id)
+                print("")
+            
+            return result
+            
+        except requests.exceptions.Timeout:
+            raise APIError(f"Timeout while initiating LinkedIn {operation_type}")
+        except requests.exceptions.RequestException as e:
+            raise APIError(f"Network error during LinkedIn {operation_type}: {str(e)}")
+        except json.JSONDecodeError as e:
+            raise APIError(f"Failed to parse LinkedIn {operation_type} response: {str(e)}")
+        except Exception as e:
+            if isinstance(e, (ValidationError, AuthenticationError, APIError)):
+                raise
+            raise APIError(f"Unexpected error during LinkedIn {operation_type}: {str(e)}")
\ No newline at end of file
diff --git a/src/api/scraper.py b/src/api/scraper.py
new file mode 100644
index 0000000..0d4fc31
--- /dev/null
+++ b/src/api/scraper.py
@@ -0,0 +1,205 @@
+import time
+from typing import Union, Dict, Any, List
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from ..utils import (
+    validate_url, validate_zone_name, validate_country_code,
+    validate_timeout, validate_max_workers, validate_url_list,
+    validate_response_format, validate_http_method, retry_request,
+    get_logger, log_request, safe_json_parse, validate_response_size
+)
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.scraper')
+
+
+class WebScraper:
+    """Handles web scraping operations using Bright Data Web Unlocker API"""
+    
+    def __init__(self, session, default_timeout=30, max_retries=3, retry_backoff=1.5):
+        self.session = session
+        self.default_timeout = default_timeout
+        self.max_retries = max_retries
+        self.retry_backoff = retry_backoff
+    
+    def scrape(
+        self,
+        url: Union[str, List[str]],
+        zone: str,
+        response_format: str = "raw",
+        method: str = "GET", 
+        country: str = "",
+        data_format: str = "markdown",
+        async_request: bool = False,
+        max_workers: int = 10,
+        timeout: int = None
+    ) -> Union[Dict[str, Any], str, List[Union[Dict[str, Any], str]]]:
+        """
+        **Unlock and scrape websites using Bright Data Web Unlocker API**
+        
+        Scrapes one or multiple URLs through Bright Data's proxy network with anti-bot detection bypass.
+        
+        **Parameters:**
+        - `url` (str | List[str]): Single URL string or list of URLs to scrape
+        - `zone` (str): Your Bright Data zone identifier 
+        - `response_format` (str, optional): Response format - `"json"` for structured data, `"raw"` for HTML string (default: `"raw"`)
+        - `method` (str, optional): HTTP method for the request (default: `"GET"`)
+        - `country` (str, optional): Two-letter ISO country code for proxy location (default: `"us"`)
+        - `data_format` (str, optional): Additional format transformation (default: `"html"`)
+        - `async_request` (bool, optional): Enable asynchronous processing (default: `False`)
+        - `max_workers` (int, optional): Maximum parallel workers for multiple URLs (default: `10`)
+        - `timeout` (int, optional): Request timeout in seconds (default: `30`)
+        
+        **Returns:**
+        - Single URL: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
+        - Multiple URLs: `List[Union[Dict[str, Any], str]]` corresponding to each input URL
+        
+        **Example Usage:**
+        ```python
+        # Single URL scraping
+        result = client.scrape(
+            url="https://example.com", 
+            zone="your_zone_name",
+            response_format="json"
+        )
+        
+        # Multiple URLs scraping
+        urls = ["https://site1.com", "https://site2.com"]
+        results = client.scrape(
+            url=urls,
+            zone="your_zone_name", 
+            response_format="raw",
+            max_workers=5
+        )
+        ```
+        
+        **Raises:**
+        - `ValidationError`: Invalid URL format or empty URL list
+        - `AuthenticationError`: Invalid API token or insufficient permissions
+        - `APIError`: Request failed or server error
+        """
+        
+        timeout = timeout or self.default_timeout
+        validate_zone_name(zone)
+        validate_response_format(response_format)
+        validate_http_method(method)
+        validate_country_code(country)
+        validate_timeout(timeout)
+        validate_max_workers(max_workers)
+        
+        if isinstance(url, list):
+            validate_url_list(url)
+            effective_max_workers = min(len(url), max_workers or 10)
+            
+            results = [None] * len(url)
+            
+            with ThreadPoolExecutor(max_workers=effective_max_workers) as executor:
+                future_to_index = {
+                    executor.submit(
+                        self._perform_single_scrape,
+                        single_url, zone, response_format, method, country,
+                        data_format, async_request, timeout
+                    ): i
+                    for i, single_url in enumerate(url)
+                }
+                for future in as_completed(future_to_index):
+                    index = future_to_index[future]
+                    try:
+                        result = future.result()
+                        results[index] = result
+                    except Exception as e:
+                        raise APIError(f"Failed to scrape {url[index]}: {str(e)}")
+            
+            return results
+        else:
+            validate_url(url)
+            return self._perform_single_scrape(
+                url, zone, response_format, method, country, 
+                data_format, async_request, timeout
+            )
+
+    def _perform_single_scrape(
+        self,
+        url: str,
+        zone: str,
+        response_format: str,
+        method: str,
+        country: str,
+        data_format: str,
+        async_request: bool,
+        timeout: int
+    ) -> Union[Dict[str, Any], str]:
+        """
+        Perform a single scrape operation with comprehensive logging
+        """
+        endpoint = "https://api.brightdata.com/request"
+        start_time = time.time()
+        
+        logger.info(f"Starting scrape request for URL: {url[:100]}{'...' if len(url) > 100 else ''}")
+        
+        payload = {
+            "zone": zone,
+            "url": url,
+            "format": response_format,
+            "method": method,
+            "data_format": data_format
+        }
+        
+        params = {}
+        if async_request:
+            params['async'] = 'true'
+        
+        @retry_request(
+            max_retries=self.max_retries,
+            backoff_factor=self.retry_backoff,
+            retry_statuses={429, 500, 502, 503, 504}
+        )
+        def make_request():
+            return self.session.post(
+                endpoint,
+                json=payload,
+                params=params,
+                timeout=timeout
+            )
+        
+        try:
+            response = make_request()
+            response_time = (time.time() - start_time) * 1000
+            
+            # Log request details
+            log_request(logger, 'POST', endpoint, response.status_code, response_time)
+            
+            if response.status_code == 200:
+                logger.info(f"Scrape completed successfully in {response_time:.2f}ms")
+                
+                validate_response_size(response.text)
+                
+                if response_format == "json":
+                    result = safe_json_parse(response.text)
+                    logger.debug(f"Processed response with {len(str(result))} characters")
+                    return result
+                else:
+                    logger.debug(f"Returning raw response with {len(response.text)} characters")
+                    return response.text
+                    
+            elif response.status_code == 400:
+                logger.error(f"Bad Request (400) for URL {url}: {response.text}")
+                raise ValidationError(f"Bad Request (400): {response.text}")
+            elif response.status_code == 401:
+                logger.error(f"Unauthorized (401) for URL {url}: Check API token")
+                raise AuthenticationError(f"Unauthorized (401): Check your API token. {response.text}")
+            elif response.status_code == 403:
+                logger.error(f"Forbidden (403) for URL {url}: Insufficient permissions")
+                raise AuthenticationError(f"Forbidden (403): Insufficient permissions. {response.text}")
+            elif response.status_code == 404:
+                logger.error(f"Not Found (404) for URL {url}: {response.text}")
+                raise APIError(f"Not Found (404): {response.text}")
+            else:
+                logger.error(f"API Error ({response.status_code}) for URL {url}: {response.text}")
+                raise APIError(f"API Error ({response.status_code}): {response.text}", 
+                              status_code=response.status_code, response_text=response.text)
+        
+        except Exception as e:
+            response_time = (time.time() - start_time) * 1000
+            logger.error(f"Request failed after {response_time:.2f}ms for URL {url}: {str(e)}", exc_info=True)
+            raise
\ No newline at end of file
diff --git a/src/api/search.py b/src/api/search.py
new file mode 100644
index 0000000..24e6365
--- /dev/null
+++ b/src/api/search.py
@@ -0,0 +1,212 @@
+import json
+import time
+from typing import Union, Dict, Any, List
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import quote_plus
+
+from ..utils import (
+    validate_zone_name, validate_country_code, validate_timeout,
+    validate_max_workers, validate_search_engine, validate_query,
+    validate_response_format, validate_http_method, retry_request,
+    get_logger, log_request, safe_json_parse, validate_response_size
+)
+from ..exceptions import ValidationError, APIError, AuthenticationError
+
+logger = get_logger('api.search')
+
+
+class SearchAPI:
+    """Handles search operations using Bright Data SERP API"""
+    
+    def __init__(self, session, default_timeout=30, max_retries=3, retry_backoff=1.5):
+        self.session = session
+        self.default_timeout = default_timeout
+        self.max_retries = max_retries
+        self.retry_backoff = retry_backoff
+    
+    def search(
+        self,
+        query: Union[str, List[str]],
+        search_engine: str = "google",
+        zone: str = None,
+        response_format: str = "raw",
+        method: str = "GET",
+        country: str = "",
+        data_format: str = "markdown",
+        async_request: bool = False,
+        max_workers: int = 10,
+        timeout: int = None,
+        parse: bool = False
+    ) -> Union[Dict[str, Any], str, List[Union[Dict[str, Any], str]]]:
+        """
+        ## Search the web using Bright Data SERP API
+        
+        Performs web searches through major search engines using Bright Data's proxy network 
+        for reliable, bot-detection-free results.
+        
+        ### Parameters:
+        - `query` (str | List[str]): Search query string or list of search queries
+        - `search_engine` (str, optional): Search engine to use - `"google"`, `"bing"`, or `"yandex"` (default: `"google"`)
+        - `zone` (str, optional): Your Bright Data zone identifier (default: `None`)
+        - `response_format` (str, optional): Response format - `"json"` for structured data, `"raw"` for HTML string (default: `"raw"`)
+        - `method` (str, optional): HTTP method for the request (default: `"GET"`)
+        - `country` (str, optional): Two-letter ISO country code for proxy location (default: `"us"`)
+        - `data_format` (str, optional): Additional format transformation (default: `"markdown"`)
+        - `async_request` (bool, optional): Enable asynchronous processing (default: `False`)
+        - `max_workers` (int, optional): Maximum parallel workers for multiple queries (default: `10`)
+        - `timeout` (int, optional): Request timeout in seconds (default: `30`)
+        - `parse` (bool, optional): Enable JSON parsing by adding brd_json=1 to URL (default: `False`)
+        
+        ### Returns:
+        - Single query: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
+        - Multiple queries: `List[Union[Dict[str, Any], str]]` corresponding to each input query
+        
+        ### Example Usage:
+        ```python
+        # Single search query
+        result = client.search(
+            query="best laptops 2024",
+            search_engine="google",
+            response_format="json"
+        )
+        
+        # Multiple search queries
+        queries = ["python tutorials", "machine learning courses", "web development"]
+        results = client.search(
+            query=queries,
+            search_engine="bing",
+            zone="your_zone_name",
+            max_workers=3
+        )
+        ```
+        
+        ### Supported Search Engines:
+        - `"google"` - Google Search
+        - `"bing"` - Microsoft Bing
+        - `"yandex"` - Yandex Search
+        
+        ### Raises:
+        - `ValidationError`: Invalid search engine, empty query, or validation errors
+        - `AuthenticationError`: Invalid API token or insufficient permissions  
+        - `APIError`: Request failed or server error
+        """
+        
+        timeout = timeout or self.default_timeout
+        validate_zone_name(zone)
+        validate_search_engine(search_engine)
+        validate_query(query)
+        validate_response_format(response_format)
+        validate_http_method(method)
+        validate_country_code(country)
+        validate_timeout(timeout)
+        validate_max_workers(max_workers)
+        
+        base_url_map = {
+            "google": "https://www.google.com/search?q=",
+            "bing": "https://www.bing.com/search?q=",
+            "yandex": "https://yandex.com/search/?text="
+        }
+        
+        base_url = base_url_map[search_engine.lower()]
+        
+        if isinstance(query, list):
+            effective_max_workers = min(len(query), max_workers or 10)
+            results = [None] * len(query)
+            
+            with ThreadPoolExecutor(max_workers=effective_max_workers) as executor:
+                future_to_index = {
+                    executor.submit(
+                        self._perform_single_search,
+                        single_query, zone, response_format, method, country,
+                        data_format, async_request, base_url, timeout, parse
+                    ): i
+                    for i, single_query in enumerate(query)
+                }
+                
+                for future in as_completed(future_to_index):
+                    index = future_to_index[future]
+                    try:
+                        result = future.result()
+                        results[index] = result
+                    except Exception as e:
+                        raise APIError(f"Failed to search '{query[index]}': {str(e)}")
+            
+            return results
+        else:
+            return self._perform_single_search(
+                query, zone, response_format, method, country, 
+                data_format, async_request, base_url, timeout, parse
+            )
+
+    def _perform_single_search(
+        self,
+        query: str,
+        zone: str,
+        response_format: str,
+        method: str,
+        country: str,
+        data_format: str,
+        async_request: bool,
+        base_url: str,
+        timeout: int,
+        parse: bool
+    ) -> Union[Dict[str, Any], str]:
+        """
+        Perform a single search operation
+        """
+        encoded_query = quote_plus(query)
+        url = f"{base_url}{encoded_query}"
+        
+        if parse:
+            url += "&brd_json=1"
+        
+        endpoint = "https://api.brightdata.com/request"
+        
+        payload = {
+            "zone": zone,
+            "url": url,
+            "format": response_format,
+            "method": method,
+            "data_format": data_format
+        }
+        
+        params = {}
+        if async_request:
+            params['async'] = 'true'
+        
+        @retry_request(
+            max_retries=self.max_retries,
+            backoff_factor=self.retry_backoff,
+            retry_statuses={429, 500, 502, 503, 504}
+        )
+        def make_request():
+            return self.session.post(
+                endpoint,
+                json=payload,
+                params=params,
+                timeout=timeout
+            )
+        
+        response = make_request()
+        
+        if response.status_code == 200:
+            if response_format == "json":
+                try:
+                    return response.json()
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse JSON response: {e}")
+                    return response.text
+            else:
+                return response.text
+                
+        elif response.status_code == 400:
+            raise ValidationError(f"Bad Request (400): {response.text}")
+        elif response.status_code == 401:
+            raise AuthenticationError(f"Unauthorized (401): Check your API token. {response.text}")
+        elif response.status_code == 403:
+            raise AuthenticationError(f"Forbidden (403): Insufficient permissions. {response.text}")
+        elif response.status_code == 404:
+            raise APIError(f"Not Found (404): {response.text}")
+        else:
+            raise APIError(f"API Error ({response.status_code}): {response.text}",
+                          status_code=response.status_code, response_text=response.text)
\ No newline at end of file

From b5a6625d449de9f874cdc3e50750a9131a71079b Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:16:03 +0200
Subject: [PATCH 06/70] Delete src/api/tst

---
 src/api/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 src/api/tst

diff --git a/src/api/tst b/src/api/tst
deleted file mode 100644
index 8b13789..0000000
--- a/src/api/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From bf6605de307c39c64ca0d968d692399353c3cbda Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:16:28 +0200
Subject: [PATCH 07/70] Create tst

---
 src/.github/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/.github/tst

diff --git a/src/.github/tst b/src/.github/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/.github/tst
@@ -0,0 +1 @@
+

From 9d17b0dfaae400ea175f80a696f84b6554746702 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:18:00 +0200
Subject: [PATCH 08/70] Create tst

---
 src/.github/workflows/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/.github/workflows/tst

diff --git a/src/.github/workflows/tst b/src/.github/workflows/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/.github/workflows/tst
@@ -0,0 +1 @@
+

From cbb11e4067bbc735b33f9a0693d6c27fd90c91d9 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:18:21 +0200
Subject: [PATCH 09/70] Add files via upload

---
 src/.github/workflows/publish.yml |  65 +++++++++++++++
 src/.github/workflows/test.yml    | 129 ++++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+)
 create mode 100644 src/.github/workflows/publish.yml
 create mode 100644 src/.github/workflows/test.yml

diff --git a/src/.github/workflows/publish.yml b/src/.github/workflows/publish.yml
new file mode 100644
index 0000000..7c2ec42
--- /dev/null
+++ b/src/.github/workflows/publish.yml
@@ -0,0 +1,65 @@
+name: Build and Publish
+
+on:
+  push:
+    tags:
+      - 'v*'
+  release:
+    types: [published]
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+    
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build twine
+        pip install -r requirements.txt
+    
+    - name: Build package
+      run: python -m build
+    
+    - name: Upload build artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: dist-files
+        path: dist/
+    
+    - name: Publish to PyPI
+      if: github.event_name == 'release'
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        twine upload dist/*
+
+  test-install:
+    runs-on: ubuntu-latest
+    needs: build
+    
+    steps:
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+    
+    - name: Download build artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: dist-files
+        path: dist/
+    
+    - name: Test wheel installation
+      run: |
+        pip install dist/*.whl
+        python -c "import brightdata; print('✅ Package imported successfully')"
\ No newline at end of file
diff --git a/src/.github/workflows/test.yml b/src/.github/workflows/test.yml
new file mode 100644
index 0000000..69a0a2d
--- /dev/null
+++ b/src/.github/workflows/test.yml
@@ -0,0 +1,129 @@
+name: Tests
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main ]
+  schedule:
+    - cron: '0 2 * * *'
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install pytest pytest-cov
+    
+    - name: Test package import
+      run: |
+        python -c "import brightdata; print('Import successful')"
+    
+    - name: Run tests
+      run: |
+        python -m pytest tests/ -v --cov=brightdata --cov-report=xml
+    
+    - name: Upload coverage to Codecov
+      if: matrix.python-version == '3.8'
+      uses: codecov/codecov-action@v3
+      with:
+        file: ./coverage.xml
+
+  test-pypi-package:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'schedule'
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.11']
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    
+    - name: Install PyPI package
+      run: |
+        python -m pip install --upgrade pip
+        pip install brightdata-sdk
+        pip install pytest
+    
+    - name: Test PyPI package import
+      run: |
+        python -c "import brightdata; print('PyPI package import successful')"
+        python -c "from brightdata import bdclient; print('bdclient import successful')"
+    
+    - name: Test PyPI package basic functionality
+      run: |
+        python -c "
+        import sys
+        from brightdata import bdclient, __version__
+        print(f'PyPI package version: {__version__}')
+        
+        # Test that validation works (accept any validation error as success)
+        try:
+            client = bdclient(api_token='test_token_too_short')
+            print('WARNING: No validation error - this might indicate an issue')
+        except Exception as e:
+            print(f'Validation error caught: {str(e)[:100]}...')
+            print('PyPI package validation working correctly')
+        
+        # Test basic client creation with disabled auto-zone creation
+        try:
+            client = bdclient(api_token='test_token_123456789', auto_create_zones=False)
+            print('Client creation successful')
+            
+            # Test that basic methods exist
+            methods = ['scrape', 'search', 'download_content']
+            for method in methods:
+                if hasattr(client, method):
+                    print(f'Method {method} exists')
+                else:
+                    print(f'Method {method} missing (might be version difference)')
+                    
+        except Exception as e:
+            print(f'ERROR: Client creation failed: {e}')
+            sys.exit(1)
+        
+        print('PyPI package basic functionality test completed')
+        "
+    
+    - name: Test PyPI package compatibility
+      run: |
+        python -c "
+        print('Running PyPI package compatibility tests...')
+        
+        # Test import compatibility
+        try:
+            from brightdata import bdclient, __version__
+            from brightdata.exceptions import ValidationError
+            print('Core imports working')
+        except ImportError as e:
+            print(f'ERROR: Import failed: {e}')
+            exit(1)
+        
+        # Test that client requires token
+        try:
+            client = bdclient()  # Should fail without token
+            print('WARNING: Client created without token - unexpected')
+        except Exception:
+            print('Token requirement validated')
+        
+        print('PyPI package compatibility tests completed')
+        "
\ No newline at end of file

From c590d4942cf1b3c71574e28742215c3ed5355144 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:21:30 +0200
Subject: [PATCH 10/70] Create tst

---
 examples/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 examples/tst

diff --git a/examples/tst b/examples/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/examples/tst
@@ -0,0 +1 @@
+

From baea61662b70cec4e73ec89e33e5367c431a141e Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:21:53 +0200
Subject: [PATCH 11/70] Add files via upload

---
 examples/browser_connection_example.py | 33 +++++++++++++++++++++
 examples/crawl_example.py              | 11 +++++++
 examples/download_snapshot_example.py  |  9 ++++++
 examples/extract_example.py            | 30 +++++++++++++++++++
 examples/scrape_chatgpt_example.py     | 15 ++++++++++
 examples/scrape_example.py             | 16 +++++++++++
 examples/scrape_linkedin_example.py    | 32 +++++++++++++++++++++
 examples/search_example.py             | 16 +++++++++++
 examples/search_linkedin_example.py    | 40 ++++++++++++++++++++++++++
 9 files changed, 202 insertions(+)
 create mode 100644 examples/browser_connection_example.py
 create mode 100644 examples/crawl_example.py
 create mode 100644 examples/download_snapshot_example.py
 create mode 100644 examples/extract_example.py
 create mode 100644 examples/scrape_chatgpt_example.py
 create mode 100644 examples/scrape_example.py
 create mode 100644 examples/scrape_linkedin_example.py
 create mode 100644 examples/search_example.py
 create mode 100644 examples/search_linkedin_example.py

diff --git a/examples/browser_connection_example.py b/examples/browser_connection_example.py
new file mode 100644
index 0000000..a6ebf98
--- /dev/null
+++ b/examples/browser_connection_example.py
@@ -0,0 +1,33 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+from playwright.sync_api import sync_playwright, Playwright
+
+client = bdclient(
+    api_token="your-api-key",
+    browser_username="copy-from-zone-configuration",
+    browser_password="copy-from-zone-configuration",
+    browser_zone="your-custom-browser-zone"
+) # Hover over the function to see browser parameters (can also be taken from .env file)
+
+def scrape(playwright: Playwright, url="https://example.com"):
+    browser = playwright.chromium.connect_over_cdp(client.connect_browser()) # Connect to the browser using Bright Data's endpoint
+    try:
+        print(f'Connected! Navigating to {url}...')
+        page = browser.new_page()
+        page.goto(url, timeout=2*60_000)
+        print('Navigated! Scraping page content...')
+        data = page.content()
+        print(f'Scraped! Data: {data}')
+    finally:
+        browser.close()
+
+
+def main():
+    with sync_playwright() as playwright:
+        scrape(playwright)
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/examples/crawl_example.py b/examples/crawl_example.py
new file mode 100644
index 0000000..65b2695
--- /dev/null
+++ b/examples/crawl_example.py
@@ -0,0 +1,11 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+client = bdclient(api_token="your-api-key") # can also be taken from .env file
+
+result = client.crawl(
+    url="https://example.com/", depth=1, filter="/product/", 
+    exclude_filter="/ads/", custom_output_fields=["markdown", "url", "page_title"]
+)
+print(f"Snapshot ID: {result['snapshot_id']}")
\ No newline at end of file
diff --git a/examples/download_snapshot_example.py b/examples/download_snapshot_example.py
new file mode 100644
index 0000000..ea7f8f0
--- /dev/null
+++ b/examples/download_snapshot_example.py
@@ -0,0 +1,9 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from brightdata import bdclient
+
+client = bdclient(api_token="your-api-key") # can also be taken from .env file
+
+snapshot_id = "" # replace with your snapshot ID
+
+client.download_snapshot(snapshot_id)
\ No newline at end of file
diff --git a/examples/extract_example.py b/examples/extract_example.py
new file mode 100644
index 0000000..0723350
--- /dev/null
+++ b/examples/extract_example.py
@@ -0,0 +1,30 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient()
+
+# Basic extraction
+result = client.extract("Extract news headlines from CNN.com")
+print(result)
+
+# Using URL parameter with structured output
+schema = {
+    "type": "object",
+    "properties": {
+        "headlines": {
+            "type": "array",
+            "items": {"type": "string"}
+        }
+    },
+    "required": ["headlines"],
+    "additionalProperties": False
+}
+
+result = client.extract(
+    query="Extract main headlines",
+    url="https://cnn.com",
+    output_scheme=schema
+)
+print(result)
\ No newline at end of file
diff --git a/examples/scrape_chatgpt_example.py b/examples/scrape_chatgpt_example.py
new file mode 100644
index 0000000..b695734
--- /dev/null
+++ b/examples/scrape_chatgpt_example.py
@@ -0,0 +1,15 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient("your-api-key") # can also be taken from .env file
+
+result = client.search_chatGPT(
+    prompt="what day is it today?"
+    # prompt=["What are the top 3 programming languages in 2024?", "Best hotels in New York", "Explain quantum computing"],
+    # additional_prompt=["Can you explain why?", "Are you sure?", ""]  
+)
+
+client.download_content(result)
+# In case of timeout error, your snapshot is still created and can be downloaded using the snapshot ID example file
diff --git a/examples/scrape_example.py b/examples/scrape_example.py
new file mode 100644
index 0000000..bf6b1a8
--- /dev/null
+++ b/examples/scrape_example.py
@@ -0,0 +1,16 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient(api_token="your-API-key") # Can also be taken from .env file
+
+URL = (["https://www.amazon.com/dp/B079QHML21",
+        "https://www.ebay.com/itm/365771796300",
+        "https://www.walmart.com/ip/Apple-MacBook-Air-13-3-inch-Laptop-Space-Gray-M1-Chip-8GB-RAM-256GB-storage/609040889"])
+
+results = client.scrape(url=URL, max_workers=5)
+
+result = client.parse_content(results, extract_text=True) # Choose what to extract
+
+print(result)
\ No newline at end of file
diff --git a/examples/scrape_linkedin_example.py b/examples/scrape_linkedin_example.py
new file mode 100644
index 0000000..8483f5a
--- /dev/null
+++ b/examples/scrape_linkedin_example.py
@@ -0,0 +1,32 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient() # can also be taken from .env file
+    
+# LinkedIn Profile URLs
+profile_url = "https://www.linkedin.com/in/elad-moshe-05a90413/"
+
+# LinkedIn Company URLs
+company_urls = [
+    "https://il.linkedin.com/company/ibm",
+    "https://www.linkedin.com/company/bright-data",
+    "https://www.linkedin.com/company/stalkit"
+]
+
+# LinkedIn Job URLs
+job_urls = [
+    "https://www.linkedin.com/jobs/view/remote-typist-%E2%80%93-data-entry-specialist-work-from-home-at-cwa-group-4181034038?trk=public_jobs_topcard-title",
+    "https://www.linkedin.com/jobs/view/arrt-r-at-shared-imaging-llc-4180989163?trk=public_jobs_topcard-title"
+]
+
+# LinkedIn Post URLs
+post_urls = [
+    "https://www.linkedin.com/posts/orlenchner_scrapecon-activity-7180537307521769472-oSYN?trk=public_profile",
+    "https://www.linkedin.com/pulse/getting-value-out-sunburst-guillaume-de-b%C3%A9naz%C3%A9?trk=public_profile_article_view"
+]
+
+results = client.scrape_linkedin.posts(post_urls) # can also be changed to async
+
+client.download_content(results)
\ No newline at end of file
diff --git a/examples/search_example.py b/examples/search_example.py
new file mode 100644
index 0000000..3b9e3eb
--- /dev/null
+++ b/examples/search_example.py
@@ -0,0 +1,16 @@
+import sys, os
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient(api_token="your-api-token", auto_create_zones=False, serp_zone="your-custom-serp-zone") # zone and API token can also be defined in .env file
+
+query = ["iphone 16", "coffee maker", "portable projector", "sony headphones",
+        "laptop stand", "power bank", "running shoes", "android tablet",
+        "hiking backpack", "dash cam"]
+
+results = client.search(query, max_workers=10,
+response_format="json", parse=True)
+
+client.download_content(results, parse=True) # parse=True to save as JSON, otherwise saves as raw HTML
\ No newline at end of file
diff --git a/examples/search_linkedin_example.py b/examples/search_linkedin_example.py
new file mode 100644
index 0000000..be5f7df
--- /dev/null
+++ b/examples/search_linkedin_example.py
@@ -0,0 +1,40 @@
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from brightdata import bdclient
+
+client = bdclient(api_token="your-api-key") # can also be taken from .env file
+
+# Search LinkedIn profiles by name
+first_names = ["James", "Idan"]
+last_names = ["Smith", "Vilenski"]
+result = client.search_linkedin.profiles(first_names, last_names)
+
+# Search jobs by URL
+job_urls = [
+    "https://www.linkedin.com/jobs/search?keywords=Software&location=Tel%20Aviv-Yafo",
+    "https://www.linkedin.com/jobs/reddit-inc.-jobs-worldwide?f_C=150573"
+]
+result = client.search_linkedin.jobs(url=job_urls)
+
+# Search jobs by keyword and location
+result = client.search_linkedin.jobs(
+    location="Paris", 
+    keyword="product manager",
+    country="FR",
+    time_range="Past month",
+    job_type="Full-time"
+)
+
+# Search posts by profile URL with date range
+result = client.search_linkedin.posts(
+    profile_url="https://www.linkedin.com/in/bettywliu",
+    start_date="2018-04-25T00:00:00.000Z",
+    end_date="2021-05-25T00:00:00.000Z"
+)
+# Search posts by company URL
+result = client.search_linkedin.posts(
+    company_url="https://www.linkedin.com/company/bright-data"
+)
+
+# Returns snapshot ID that can be used to download the content later using download_snapshot function
\ No newline at end of file

From 468a96f19d94be277cf800ce081a35fbfd7b047d Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:22:22 +0200
Subject: [PATCH 12/70] Delete src/.github/tst

---
 src/.github/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 src/.github/tst

diff --git a/src/.github/tst b/src/.github/tst
deleted file mode 100644
index 8b13789..0000000
--- a/src/.github/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From f469f81e3f83fcfa72a7b42276ba64ebd7435440 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:24:44 +0200
Subject: [PATCH 13/70] Create tst

---
 tests/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tests/tst

diff --git a/tests/tst b/tests/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/tst
@@ -0,0 +1 @@
+

From a6a8e5ae925c0bd6c7e80566add2c768862ff069 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:25:03 +0200
Subject: [PATCH 14/70] Add files via upload

---
 tests/__init__.py    |   0
 tests/test_client.py | 121 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_client.py

diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_client.py b/tests/test_client.py
new file mode 100644
index 0000000..51b1315
--- /dev/null
+++ b/tests/test_client.py
@@ -0,0 +1,121 @@
+"""
+Comprehensive tests for the Bright Data SDK client.
+
+This test suite covers:
+- Client initialization with API tokens (from parameter and environment)
+- API token validation and error handling for missing tokens
+- Zone configuration (default and custom zone names)
+- URL validation in scrape method (scheme requirement)
+- Search query validation (empty query handling)
+- Search engine validation (unsupported engine handling)
+
+All tests are designed to run without requiring real API tokens by:
+- Using sufficiently long test tokens to pass validation
+- Mocking zone management to avoid network calls
+- Testing validation logic and error messages
+"""
+
+import pytest
+import os
+from unittest.mock import patch
+
+from brightdata import bdclient
+from brightdata.exceptions import ValidationError
+
+
+class TestBdClient:
+    """Test cases for the main bdclient class"""
+    
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def test_client_init_with_token(self, mock_zones):
+        """Test client initialization with API token"""
+        with patch.dict(os.environ, {}, clear=True):
+            client = bdclient(api_token="valid_test_token_12345678", auto_create_zones=False)
+            assert client.api_token == "valid_test_token_12345678"
+    
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def test_client_init_from_env(self, mock_zones):
+        """Test client initialization from environment variable"""
+        with patch.dict(os.environ, {"BRIGHTDATA_API_TOKEN": "valid_env_token_12345678"}):
+            client = bdclient(auto_create_zones=False)
+            assert client.api_token == "valid_env_token_12345678"
+    
+    def test_client_init_no_token_raises_error(self):
+        """Test that missing API token raises ValidationError"""
+        with patch.dict(os.environ, {}, clear=True):
+            with patch('dotenv.load_dotenv'):
+                with pytest.raises(ValidationError, match="API token is required"):
+                    bdclient()
+    
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def test_client_zone_defaults(self, mock_zones):
+        """Test default zone configurations"""
+        with patch.dict(os.environ, {}, clear=True):
+            client = bdclient(api_token="valid_test_token_12345678", auto_create_zones=False)
+            assert client.web_unlocker_zone == "sdk_unlocker"
+            assert client.serp_zone == "sdk_serp"
+    
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def test_client_custom_zones(self, mock_zones):
+        """Test custom zone configuration"""
+        with patch.dict(os.environ, {}, clear=True):
+            client = bdclient(
+                api_token="valid_test_token_12345678",
+                web_unlocker_zone="custom_unlocker",
+                serp_zone="custom_serp",
+                auto_create_zones=False
+            )
+            assert client.web_unlocker_zone == "custom_unlocker"
+            assert client.serp_zone == "custom_serp"
+
+
+class TestClientMethods:
+    """Test cases for client methods with mocked responses"""
+    
+    @pytest.fixture
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def client(self, mock_zones):
+        """Create a test client with mocked validation"""
+        with patch.dict(os.environ, {}, clear=True):
+            client = bdclient(api_token="valid_test_token_12345678", auto_create_zones=False)
+            return client
+    
+    def test_scrape_single_url_validation(self, client):
+        """Test URL validation in scrape method"""
+        with pytest.raises(ValidationError, match="URL must include a scheme"):
+            client.scrape("not_a_url")
+    
+    def test_search_empty_query_validation(self, client):
+        """Test query validation in search method"""
+        with pytest.raises(ValidationError, match="cannot be empty"):
+            client.search("")
+    
+    def test_search_unsupported_engine(self, client):
+        """Test unsupported search engine validation"""
+        with pytest.raises(ValidationError, match="Invalid search engine"):
+            client.search("test query", search_engine="invalid_engine")
+    
+    def test_search_with_parse_parameter(self, client, monkeypatch):
+        """Test search with parse parameter adds brd_json=1 to URL"""
+        # Mock the session.post method to capture the request
+        captured_request = {}
+        
+        def mock_post(*args, **kwargs):
+            captured_request.update(kwargs)
+            from unittest.mock import Mock
+            response = Mock()
+            response.status_code = 200
+            response.text = "mocked html response"
+            return response
+        
+        monkeypatch.setattr(client.search_api.session, 'post', mock_post)
+        
+        result = client.search("test query", parse=True)
+        
+        # Verify the request was made with correct URL containing &brd_json=1
+        request_data = captured_request.get('json', {})
+        assert "&brd_json=1" in request_data["url"]
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
\ No newline at end of file

From 35e632a0593432597280737cfb03584a7b71ff3a Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:25:22 +0200
Subject: [PATCH 15/70] Delete tests/tst

---
 tests/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 tests/tst

diff --git a/tests/tst b/tests/tst
deleted file mode 100644
index 8b13789..0000000
--- a/tests/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From b54f9f067e3b7a04561e91ec61a72c0443ee9742 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:26:20 +0200
Subject: [PATCH 16/70] Create tst

---
 .github/workflows/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/workflows/tst

diff --git a/.github/workflows/tst b/.github/workflows/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/.github/workflows/tst
@@ -0,0 +1 @@
+

From 6d625f4b0017177206308683c0ac34106fe20a18 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:26:40 +0200
Subject: [PATCH 17/70] Add files via upload

---
 .github/workflows/publish.yml |  65 +++++++++++++++++
 .github/workflows/test.yml    | 129 ++++++++++++++++++++++++++++++++++
 2 files changed, 194 insertions(+)
 create mode 100644 .github/workflows/publish.yml
 create mode 100644 .github/workflows/test.yml

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..7c2ec42
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,65 @@
+name: Build and Publish
+
+on:
+  push:
+    tags:
+      - 'v*'
+  release:
+    types: [published]
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+    
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build twine
+        pip install -r requirements.txt
+    
+    - name: Build package
+      run: python -m build
+    
+    - name: Upload build artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: dist-files
+        path: dist/
+    
+    - name: Publish to PyPI
+      if: github.event_name == 'release'
+      env:
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+      run: |
+        twine upload dist/*
+
+  test-install:
+    runs-on: ubuntu-latest
+    needs: build
+    
+    steps:
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.8'
+    
+    - name: Download build artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: dist-files
+        path: dist/
+    
+    - name: Test wheel installation
+      run: |
+        pip install dist/*.whl
+        python -c "import brightdata; print('✅ Package imported successfully')"
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..69a0a2d
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,129 @@
+name: Tests
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main ]
+  schedule:
+    - cron: '0 2 * * *'
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+        pip install pytest pytest-cov
+    
+    - name: Test package import
+      run: |
+        python -c "import brightdata; print('Import successful')"
+    
+    - name: Run tests
+      run: |
+        python -m pytest tests/ -v --cov=brightdata --cov-report=xml
+    
+    - name: Upload coverage to Codecov
+      if: matrix.python-version == '3.8'
+      uses: codecov/codecov-action@v3
+      with:
+        file: ./coverage.xml
+
+  test-pypi-package:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'schedule'
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.11']
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    
+    - name: Install PyPI package
+      run: |
+        python -m pip install --upgrade pip
+        pip install brightdata-sdk
+        pip install pytest
+    
+    - name: Test PyPI package import
+      run: |
+        python -c "import brightdata; print('PyPI package import successful')"
+        python -c "from brightdata import bdclient; print('bdclient import successful')"
+    
+    - name: Test PyPI package basic functionality
+      run: |
+        python -c "
+        import sys
+        from brightdata import bdclient, __version__
+        print(f'PyPI package version: {__version__}')
+        
+        # Test that validation works (accept any validation error as success)
+        try:
+            client = bdclient(api_token='test_token_too_short')
+            print('WARNING: No validation error - this might indicate an issue')
+        except Exception as e:
+            print(f'Validation error caught: {str(e)[:100]}...')
+            print('PyPI package validation working correctly')
+        
+        # Test basic client creation with disabled auto-zone creation
+        try:
+            client = bdclient(api_token='test_token_123456789', auto_create_zones=False)
+            print('Client creation successful')
+            
+            # Test that basic methods exist
+            methods = ['scrape', 'search', 'download_content']
+            for method in methods:
+                if hasattr(client, method):
+                    print(f'Method {method} exists')
+                else:
+                    print(f'Method {method} missing (might be version difference)')
+                    
+        except Exception as e:
+            print(f'ERROR: Client creation failed: {e}')
+            sys.exit(1)
+        
+        print('PyPI package basic functionality test completed')
+        "
+    
+    - name: Test PyPI package compatibility
+      run: |
+        python -c "
+        print('Running PyPI package compatibility tests...')
+        
+        # Test import compatibility
+        try:
+            from brightdata import bdclient, __version__
+            from brightdata.exceptions import ValidationError
+            print('Core imports working')
+        except ImportError as e:
+            print(f'ERROR: Import failed: {e}')
+            exit(1)
+        
+        # Test that client requires token
+        try:
+            client = bdclient()  # Should fail without token
+            print('WARNING: Client created without token - unexpected')
+        except Exception:
+            print('Token requirement validated')
+        
+        print('PyPI package compatibility tests completed')
+        "
\ No newline at end of file

From 3407ff9a10868472e342d88722e52730703d004b Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:26:58 +0200
Subject: [PATCH 18/70] Delete src/.github/workflows directory

---
 src/.github/workflows/publish.yml |  65 ---------------
 src/.github/workflows/test.yml    | 129 ------------------------------
 src/.github/workflows/tst         |   1 -
 3 files changed, 195 deletions(-)
 delete mode 100644 src/.github/workflows/publish.yml
 delete mode 100644 src/.github/workflows/test.yml
 delete mode 100644 src/.github/workflows/tst

diff --git a/src/.github/workflows/publish.yml b/src/.github/workflows/publish.yml
deleted file mode 100644
index 7c2ec42..0000000
--- a/src/.github/workflows/publish.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-name: Build and Publish
-
-on:
-  push:
-    tags:
-      - 'v*'
-  release:
-    types: [published]
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.8'
-    
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build twine
-        pip install -r requirements.txt
-    
-    - name: Build package
-      run: python -m build
-    
-    - name: Upload build artifacts
-      uses: actions/upload-artifact@v4
-      with:
-        name: dist-files
-        path: dist/
-    
-    - name: Publish to PyPI
-      if: github.event_name == 'release'
-      env:
-        TWINE_USERNAME: __token__
-        TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
-      run: |
-        twine upload dist/*
-
-  test-install:
-    runs-on: ubuntu-latest
-    needs: build
-    
-    steps:
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.8'
-    
-    - name: Download build artifacts
-      uses: actions/download-artifact@v4
-      with:
-        name: dist-files
-        path: dist/
-    
-    - name: Test wheel installation
-      run: |
-        pip install dist/*.whl
-        python -c "import brightdata; print('✅ Package imported successfully')"
\ No newline at end of file
diff --git a/src/.github/workflows/test.yml b/src/.github/workflows/test.yml
deleted file mode 100644
index 69a0a2d..0000000
--- a/src/.github/workflows/test.yml
+++ /dev/null
@@ -1,129 +0,0 @@
-name: Tests
-
-on:
-  push:
-    branches: [ main, develop ]
-  pull_request:
-    branches: [ main ]
-  schedule:
-    - cron: '0 2 * * *'
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install pytest pytest-cov
-    
-    - name: Test package import
-      run: |
-        python -c "import brightdata; print('Import successful')"
-    
-    - name: Run tests
-      run: |
-        python -m pytest tests/ -v --cov=brightdata --cov-report=xml
-    
-    - name: Upload coverage to Codecov
-      if: matrix.python-version == '3.8'
-      uses: codecov/codecov-action@v3
-      with:
-        file: ./coverage.xml
-
-  test-pypi-package:
-    runs-on: ubuntu-latest
-    if: github.event_name == 'schedule'
-    strategy:
-      matrix:
-        python-version: ['3.8', '3.11']
-    
-    steps:
-    - uses: actions/checkout@v4
-    
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    
-    - name: Install PyPI package
-      run: |
-        python -m pip install --upgrade pip
-        pip install brightdata-sdk
-        pip install pytest
-    
-    - name: Test PyPI package import
-      run: |
-        python -c "import brightdata; print('PyPI package import successful')"
-        python -c "from brightdata import bdclient; print('bdclient import successful')"
-    
-    - name: Test PyPI package basic functionality
-      run: |
-        python -c "
-        import sys
-        from brightdata import bdclient, __version__
-        print(f'PyPI package version: {__version__}')
-        
-        # Test that validation works (accept any validation error as success)
-        try:
-            client = bdclient(api_token='test_token_too_short')
-            print('WARNING: No validation error - this might indicate an issue')
-        except Exception as e:
-            print(f'Validation error caught: {str(e)[:100]}...')
-            print('PyPI package validation working correctly')
-        
-        # Test basic client creation with disabled auto-zone creation
-        try:
-            client = bdclient(api_token='test_token_123456789', auto_create_zones=False)
-            print('Client creation successful')
-            
-            # Test that basic methods exist
-            methods = ['scrape', 'search', 'download_content']
-            for method in methods:
-                if hasattr(client, method):
-                    print(f'Method {method} exists')
-                else:
-                    print(f'Method {method} missing (might be version difference)')
-                    
-        except Exception as e:
-            print(f'ERROR: Client creation failed: {e}')
-            sys.exit(1)
-        
-        print('PyPI package basic functionality test completed')
-        "
-    
-    - name: Test PyPI package compatibility
-      run: |
-        python -c "
-        print('Running PyPI package compatibility tests...')
-        
-        # Test import compatibility
-        try:
-            from brightdata import bdclient, __version__
-            from brightdata.exceptions import ValidationError
-            print('Core imports working')
-        except ImportError as e:
-            print(f'ERROR: Import failed: {e}')
-            exit(1)
-        
-        # Test that client requires token
-        try:
-            client = bdclient()  # Should fail without token
-            print('WARNING: Client created without token - unexpected')
-        except Exception:
-            print('Token requirement validated')
-        
-        print('PyPI package compatibility tests completed')
-        "
\ No newline at end of file
diff --git a/src/.github/workflows/tst b/src/.github/workflows/tst
deleted file mode 100644
index 8b13789..0000000
--- a/src/.github/workflows/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From e7a5e316efa02f6b447b30009f070bc5d3c8aac7 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:27:17 +0200
Subject: [PATCH 19/70] Delete .github/workflows/tst

---
 .github/workflows/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 .github/workflows/tst

diff --git a/.github/workflows/tst b/.github/workflows/tst
deleted file mode 100644
index 8b13789..0000000
--- a/.github/workflows/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From 35f434b0a5ad6400ae37e34da642b78cf6b49a16 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:35:08 +0200
Subject: [PATCH 20/70] Create tst

---
 src/utils/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/utils/tst

diff --git a/src/utils/tst b/src/utils/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/utils/tst
@@ -0,0 +1 @@
+

From 916467aa1209d846d639c19d88ca5803cb29e225 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:35:31 +0200
Subject: [PATCH 21/70] Add files via upload

---
 src/utils/__init__.py           |  35 +++++
 src/utils/logging_config.py     | 177 +++++++++++++++++++++
 src/utils/parser.py             | 264 ++++++++++++++++++++++++++++++++
 src/utils/response_validator.py |  49 ++++++
 src/utils/retry.py              |  90 +++++++++++
 src/utils/validation.py         | 183 ++++++++++++++++++++++
 src/utils/zone_manager.py       | 174 +++++++++++++++++++++
 7 files changed, 972 insertions(+)
 create mode 100644 src/utils/__init__.py
 create mode 100644 src/utils/logging_config.py
 create mode 100644 src/utils/parser.py
 create mode 100644 src/utils/response_validator.py
 create mode 100644 src/utils/retry.py
 create mode 100644 src/utils/validation.py
 create mode 100644 src/utils/zone_manager.py

diff --git a/src/utils/__init__.py b/src/utils/__init__.py
new file mode 100644
index 0000000..75a2f6c
--- /dev/null
+++ b/src/utils/__init__.py
@@ -0,0 +1,35 @@
+from .validation import (
+    validate_url, validate_zone_name, validate_country_code, 
+    validate_timeout, validate_max_workers, validate_url_list,
+    validate_search_engine, validate_query, validate_response_format,
+    validate_http_method
+)
+from .retry import retry_request
+from .zone_manager import ZoneManager
+from .logging_config import setup_logging, get_logger, log_request
+from .response_validator import safe_json_parse, validate_response_size, check_response_not_empty
+from .parser import parse_content, parse_multiple, extract_structured_data
+
+__all__ = [
+    'validate_url',
+    'validate_zone_name', 
+    'validate_country_code',
+    'validate_timeout',
+    'validate_max_workers',
+    'validate_url_list',
+    'validate_search_engine',
+    'validate_query',
+    'validate_response_format',
+    'validate_http_method',
+    'retry_request',
+    'ZoneManager',
+    'setup_logging',
+    'get_logger',
+    'log_request',
+    'safe_json_parse',
+    'validate_response_size',
+    'check_response_not_empty',
+    'parse_content',
+    'parse_multiple',
+    'extract_structured_data'
+]
\ No newline at end of file
diff --git a/src/utils/logging_config.py b/src/utils/logging_config.py
new file mode 100644
index 0000000..89289da
--- /dev/null
+++ b/src/utils/logging_config.py
@@ -0,0 +1,177 @@
+"""
+Structured logging configuration for Bright Data SDK
+"""
+import logging
+import json
+import time
+from typing import Dict, Any
+import uuid
+
+
+class StructuredFormatter(logging.Formatter):
+    """Custom formatter that outputs structured JSON logs"""
+    
+    def __init__(self):
+        super().__init__()
+        self.start_time = time.time()
+    
+    def format(self, record):
+        log_data = {
+            'timestamp': self.formatTime(record),
+            'level': record.levelname,
+            'logger': record.name,
+            'message': record.getMessage(),
+            'module': record.module,
+            'function': record.funcName,
+            'line': record.lineno
+        }
+        
+        correlation_id = getattr(record, 'correlation_id', None)
+        if correlation_id:
+            log_data['correlation_id'] = correlation_id
+            
+        if hasattr(record, 'url'):
+            log_data['url'] = record.url
+        if hasattr(record, 'method'):
+            log_data['method'] = record.method
+        if hasattr(record, 'status_code'):
+            log_data['status_code'] = record.status_code
+        if hasattr(record, 'response_time'):
+            log_data['response_time_ms'] = record.response_time
+            
+        if record.exc_info:
+            log_data['exception'] = {
+                'type': record.exc_info[0].__name__ if record.exc_info[0] else None,
+                'message': str(record.exc_info[1]) if record.exc_info[1] else None,
+                'traceback': self.formatException(record.exc_info)
+            }
+        
+        log_data = self._sanitize_log_data(log_data)
+        
+        return json.dumps(log_data, default=str)
+    
+    def _sanitize_log_data(self, log_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Remove or mask sensitive information from log data"""
+        sensitive_keys = ['authorization', 'token', 'api_token', 'password', 'secret']
+        
+        def sanitize_value(key: str, value: Any) -> Any:
+            if isinstance(key, str) and any(sensitive in key.lower() for sensitive in sensitive_keys):
+                return "***REDACTED***"
+            elif isinstance(value, str) and len(value) > 20:
+                if value.isalnum() and len(value) > 32:
+                    return f"{value[:8]}***REDACTED***{value[-4:]}"
+            return value
+        
+        def recursive_sanitize(obj):
+            if isinstance(obj, dict):
+                return {k: recursive_sanitize(sanitize_value(k, v)) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [recursive_sanitize(item) for item in obj]
+            else:
+                return obj
+        
+        return recursive_sanitize(log_data)
+
+
+def setup_logging(level: str = "INFO", structured: bool = True, verbose: bool = True) -> None:
+    """
+    Setup logging configuration for the SDK
+    
+    Args:
+        level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        structured: Whether to use structured JSON logging
+        verbose: Whether to show verbose logging (default: True)
+                When False, only WARNING and above are shown
+                When True, uses the specified level
+    """
+    if not verbose:
+        log_level = logging.WARNING
+    else:
+        log_level = getattr(logging, level.upper(), logging.INFO)
+    
+    root_logger = logging.getLogger('brightdata')
+    root_logger.handlers.clear()
+    
+    handler = logging.StreamHandler()
+    handler.setLevel(log_level)
+    
+    if structured:
+        formatter = StructuredFormatter()
+    else:
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+    
+    handler.setFormatter(formatter)
+    root_logger.addHandler(handler)
+    root_logger.setLevel(log_level)
+    
+    root_logger.propagate = False
+
+
+def get_logger(name: str) -> logging.Logger:
+    """
+    Get a logger instance with the specified name
+    
+    Args:
+        name: Logger name
+        
+    Returns:
+        Configured logger instance
+    """
+    return logging.getLogger(f'brightdata.{name}')
+
+
+def log_request(logger: logging.Logger, method: str, url: str, 
+                status_code: int = None, response_time: float = None,
+                correlation_id: str = None) -> None:
+    """
+    Log HTTP request details
+    
+    Args:
+        logger: Logger instance
+        method: HTTP method
+        url: Request URL (will be sanitized)
+        status_code: HTTP response status code
+        response_time: Response time in milliseconds
+        correlation_id: Request correlation ID
+    """
+    extra = {
+        'method': method,
+        'url': _sanitize_url(url),
+        'correlation_id': correlation_id or str(uuid.uuid4())
+    }
+    
+    if status_code is not None:
+        extra['status_code'] = status_code
+    if response_time is not None:
+        extra['response_time'] = response_time
+    
+    if status_code and status_code >= 400:
+        logger.error(f"HTTP request failed: {method} {_sanitize_url(url)}", extra=extra)
+    else:
+        logger.info(f"HTTP request: {method} {_sanitize_url(url)}", extra=extra)
+
+
+def _sanitize_url(url: str) -> str:
+    """Sanitize URL to remove sensitive query parameters"""
+    try:
+        from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
+        
+        parsed = urlparse(url)
+        query_params = parse_qs(parsed.query)
+        
+        sensitive_params = ['token', 'api_key', 'secret', 'password']
+        for param in sensitive_params:
+            if param in query_params:
+                query_params[param] = ['***REDACTED***']
+        
+        sanitized_query = urlencode(query_params, doseq=True)
+        sanitized = urlunparse((
+            parsed.scheme, parsed.netloc, parsed.path,
+            parsed.params, sanitized_query, parsed.fragment
+        ))
+        
+        return sanitized
+    except Exception:
+        return url.split('?')[0] + ('?***PARAMS_REDACTED***' if '?' in url else '')
\ No newline at end of file
diff --git a/src/utils/parser.py b/src/utils/parser.py
new file mode 100644
index 0000000..686ad39
--- /dev/null
+++ b/src/utils/parser.py
@@ -0,0 +1,264 @@
+"""
+Content parsing utilities for Bright Data SDK responses
+
+Provides functions to extract and parse content from scraping and search results.
+"""
+import json
+import re
+from typing import Any, Dict, List, Union, Optional
+
+from bs4 import BeautifulSoup
+
+
+def parse_content(data: Union[str, Dict, List], extract_text: bool = True, extract_links: bool = False, extract_images: bool = False) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+    """
+    Parse content from Bright Data API responses
+    
+    Automatically detects and handles both single and multiple results from scrape/search operations.
+    Can be used as a standalone function or called from the client.
+    
+    Args:
+        data: Response data from scrape() or search() - can be JSON dict/list or HTML string
+        extract_text: Extract clean text content (default: True)
+        extract_links: Extract all links from content (default: False)  
+        extract_images: Extract image URLs from content (default: False)
+        
+    Returns:
+        Dict containing parsed content for single results, or List[Dict] for multiple results with keys:
+        - 'type': 'json' or 'html'
+        - 'text': Cleaned text content (if extract_text=True)
+        - 'links': List of extracted links (if extract_links=True)
+        - 'images': List of image URLs (if extract_images=True)
+        - 'title': Page title (if available)
+        - 'raw_length': Length of original content
+        - 'structured_data': Original JSON data (if type='json')
+    """
+    if _is_multiple_results(data):
+        return parse_multiple(data, extract_text=extract_text, extract_links=extract_links, extract_images=extract_images)
+    
+    return _parse_single_content(data, extract_text, extract_links, extract_images)
+
+
+def parse_multiple(data_list: List[Union[str, Dict]], extract_text: bool = True, extract_links: bool = False, extract_images: bool = False) -> List[Dict[str, Any]]:
+    """
+    Parse multiple content items (useful for batch scraping results)
+    
+    Args:
+        data_list: List of response data items
+        extract_text: Extract clean text content (default: True)
+        extract_links: Extract all links from content (default: False)
+        extract_images: Extract image URLs from content (default: False)
+        
+    Returns:
+        List of parsed content dictionaries
+    """
+    if not isinstance(data_list, list):
+        return []
+    
+    return [_parse_single_content(item, extract_text, extract_links, extract_images) for item in data_list]
+
+
+def _is_multiple_results(data: Union[str, Dict, List]) -> bool:
+    """
+    Detect if data contains multiple scraping/search results
+    
+    Args:
+        data: Response data to analyze
+        
+    Returns:
+        True if data appears to be multiple results, False otherwise
+    """
+    if not isinstance(data, list):
+        return False
+    
+    if len(data) <= 1:
+        return False
+    
+    multiple_result_indicators = 0
+    
+    for item in data[:3]:
+        if isinstance(item, dict):
+            common_keys = {'html', 'body', 'content', 'page_html', 'raw_html', 'url', 'status_code'}
+            if any(key in item for key in common_keys):
+                multiple_result_indicators += 1
+        elif isinstance(item, str) and len(item) > 100:
+            if '<html' in item.lower() or '<!doctype' in item.lower():
+                multiple_result_indicators += 1
+    
+    return multiple_result_indicators >= 2
+
+
+def _parse_single_content(data: Union[str, Dict, List], extract_text: bool = True, extract_links: bool = False, extract_images: bool = False) -> Dict[str, Any]:
+    """
+    Parse single content item from Bright Data API responses
+    
+    Args:
+        data: Single response data item - can be JSON dict or HTML string
+        extract_text: Extract clean text content (default: True)
+        extract_links: Extract all links from content (default: False)
+        extract_images: Extract image URLs from content (default: False)
+        
+    Returns:
+        Dict containing parsed content
+    """
+    result = {
+        'type': None,
+        'raw_length': 0,
+        'title': None
+    }
+    
+    if data is None:
+        return result
+    
+    if isinstance(data, (dict, list)):
+        result['type'] = 'json'
+        result['structured_data'] = data
+        result['raw_length'] = len(str(data))
+        
+        html_content = _extract_html_from_json(data)
+        if html_content and (extract_text or extract_links or extract_images):
+            _parse_html_content(html_content, result, extract_text, extract_links, extract_images)
+        
+        result['title'] = _extract_title_from_json(data)
+    
+    elif isinstance(data, str):
+        result['type'] = 'html'
+        result['raw_length'] = len(data)
+        
+        if extract_text or extract_links or extract_images:
+            _parse_html_content(data, result, extract_text, extract_links, extract_images)
+    
+    return result
+
+
+def extract_structured_data(data: Union[str, Dict, List]) -> Optional[Dict]:
+    """
+    Extract structured data (JSON-LD, microdata) from content
+    
+    Args:
+        data: Response data
+        
+    Returns:
+        Structured data if found, None otherwise
+    """
+    html_content = None
+    
+    if isinstance(data, str):
+        html_content = data
+    elif isinstance(data, (dict, list)):
+        html_content = _extract_html_from_json(data)
+    
+    if not html_content:
+        return None
+    
+    try:
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        scripts = soup.find_all('script', type='application/ld+json')
+        if scripts:
+            structured_data = []
+            for script in scripts:
+                try:
+                    data = json.loads(script.string)
+                    structured_data.append(data)
+                except json.JSONDecodeError:
+                    continue
+            if structured_data:
+                return {'json_ld': structured_data}
+                
+    except Exception:
+        pass
+    
+    return None
+
+
+def _extract_html_from_json(data: Union[Dict, List]) -> Optional[str]:
+    """Extract HTML content from JSON response structure"""
+    if isinstance(data, dict):
+        html_keys = ['html', 'body', 'content', 'page_html', 'raw_html']
+        for key in html_keys:
+            if key in data and isinstance(data[key], str):
+                return data[key]
+        
+        for value in data.values():
+            if isinstance(value, (dict, list)):
+                html = _extract_html_from_json(value)
+                if html:
+                    return html
+                    
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item, (dict, list)):
+                html = _extract_html_from_json(item)
+                if html:
+                    return html
+    
+    return None
+
+
+def _extract_title_from_json(data: Union[Dict, List]) -> Optional[str]:
+    """Extract title from JSON response structure"""
+    if isinstance(data, dict):
+        title_keys = ['title', 'page_title', 'name']
+        for key in title_keys:
+            if key in data and isinstance(data[key], str):
+                return data[key].strip()
+                
+        for value in data.values():
+            if isinstance(value, (dict, list)):
+                title = _extract_title_from_json(value)
+                if title:
+                    return title
+                    
+    elif isinstance(data, list):
+        for item in data:
+            if isinstance(item, (dict, list)):
+                title = _extract_title_from_json(item)
+                if title:
+                    return title
+    
+    return None
+
+
+def _parse_html_content(html: str, result: Dict, extract_text: bool, extract_links: bool, extract_images: bool):
+    """Parse HTML content and update result dictionary"""
+    try:
+        soup = BeautifulSoup(html, 'html.parser')
+        
+        if not result.get('title'):
+            title_tag = soup.find('title')
+            if title_tag:
+                result['title'] = title_tag.get_text().strip()
+        
+        if extract_text:
+            for script in soup(["script", "style"]):
+                script.decompose()
+            
+            text = soup.get_text()
+            lines = (line.strip() for line in text.splitlines())
+            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+            result['text'] = '\n'.join(chunk for chunk in chunks if chunk)
+        
+        if extract_links:
+            links = []
+            for a_tag in soup.find_all('a', href=True):
+                href = a_tag['href']
+                text = a_tag.get_text().strip()
+                links.append({'url': href, 'text': text})
+            result['links'] = links
+        
+        if extract_images:
+            images = []
+            for img_tag in soup.find_all('img', src=True):
+                src = img_tag['src']
+                alt = img_tag.get('alt', '').strip()
+                images.append({'url': src, 'alt': alt})
+            result['images'] = images
+            
+    except Exception as e:
+        if extract_text:
+            result['text'] = f"HTML parsing failed: {str(e)}"
+        if extract_links:
+            result['links'] = []
+        if extract_images:
+            result['images'] = []
\ No newline at end of file
diff --git a/src/utils/response_validator.py b/src/utils/response_validator.py
new file mode 100644
index 0000000..83a9aa7
--- /dev/null
+++ b/src/utils/response_validator.py
@@ -0,0 +1,49 @@
+"""
+Minimal response validation utilities for Bright Data SDK
+"""
+import json
+from typing import Any, Dict, Union
+from ..exceptions import ValidationError
+
+
+def safe_json_parse(response_text: str) -> Dict[str, Any]:
+    """
+    Safely parse JSON response with minimal validation
+    
+    Args:
+        response_text: Raw response text from API
+        
+    Returns:
+        Parsed JSON data or original text if parsing fails
+    """
+    if not response_text:
+        return {}
+    
+    try:
+        return json.loads(response_text)
+    except (json.JSONDecodeError, TypeError):
+        # Return original text if JSON parsing fails
+        return response_text
+
+
+def validate_response_size(response_text: str, max_size_mb: float = 100.0) -> None:
+    """
+    Quick size check to prevent memory issues
+    
+    Args:
+        response_text: Response text to validate
+        max_size_mb: Maximum allowed size in megabytes
+    """
+    if response_text and len(response_text) > (max_size_mb * 1024 * 1024):
+        raise ValidationError(f"Response too large (>{max_size_mb}MB)")
+
+
+def check_response_not_empty(data: Any) -> None:
+    """
+    Minimal check that response contains data
+    
+    Args:
+        data: Response data to check
+    """
+    if data is None or (isinstance(data, str) and len(data.strip()) == 0):
+        raise ValidationError("Empty response received")
\ No newline at end of file
diff --git a/src/utils/retry.py b/src/utils/retry.py
new file mode 100644
index 0000000..361645a
--- /dev/null
+++ b/src/utils/retry.py
@@ -0,0 +1,90 @@
+import time
+import random
+import requests
+from functools import wraps
+from ..exceptions import NetworkError, APIError
+
+
+def retry_request(max_retries=3, backoff_factor=1.5, retry_statuses=None, max_backoff=60):
+    """
+    Decorator for retrying requests with exponential backoff and jitter
+    
+    Args:
+        max_retries: Maximum number of retry attempts
+        backoff_factor: Exponential backoff multiplier
+        retry_statuses: HTTP status codes that should trigger retries
+        max_backoff: Maximum backoff time in seconds
+    """
+    if retry_statuses is None:
+        retry_statuses = {429, 500, 502, 503, 504}
+    
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            last_exception = None
+            
+            for attempt in range(max_retries + 1):  # +1 to include initial attempt
+                try:
+                    response = func(*args, **kwargs)
+                    
+                    # Check if we should retry based on status code
+                    if hasattr(response, 'status_code') and response.status_code in retry_statuses:
+                        if attempt >= max_retries:
+                            raise APIError(
+                                f"Server error after {max_retries} retries: HTTP {response.status_code}",
+                                status_code=response.status_code,
+                                response_text=getattr(response, 'text', '')
+                            )
+                        
+                        # Calculate backoff with jitter
+                        backoff_time = min(backoff_factor ** attempt, max_backoff)
+                        jitter = backoff_time * 0.1 * random.random()  # Add up to 10% jitter
+                        total_delay = backoff_time + jitter
+                        
+                        time.sleep(total_delay)
+                        continue
+                    
+                    return response
+                    
+                except requests.exceptions.ConnectTimeout as e:
+                    last_exception = NetworkError(f"Connection timeout: {str(e)}")
+                except requests.exceptions.ReadTimeout as e:
+                    last_exception = NetworkError(f"Read timeout: {str(e)}")
+                except requests.exceptions.Timeout as e:
+                    last_exception = NetworkError(f"Request timeout: {str(e)}")
+                except requests.exceptions.ConnectionError as e:
+                    # Handle DNS resolution, connection refused, etc.
+                    if "Name or service not known" in str(e):
+                        last_exception = NetworkError(f"DNS resolution failed: {str(e)}")
+                    elif "Connection refused" in str(e):
+                        last_exception = NetworkError(f"Connection refused: {str(e)}")
+                    else:
+                        last_exception = NetworkError(f"Connection error: {str(e)}")
+                except requests.exceptions.SSLError as e:
+                    last_exception = NetworkError(f"SSL/TLS error: {str(e)}")
+                except requests.exceptions.ProxyError as e:
+                    last_exception = NetworkError(f"Proxy error: {str(e)}")
+                except requests.exceptions.RequestException as e:
+                    last_exception = NetworkError(f"Network error: {str(e)}")
+                except Exception as e:
+                    # Catch any other unexpected exceptions
+                    last_exception = NetworkError(f"Unexpected error: {str(e)}")
+                
+                # If this was the last attempt, raise the exception
+                if attempt >= max_retries:
+                    raise last_exception
+                
+                # Calculate backoff with jitter for network errors
+                backoff_time = min(backoff_factor ** attempt, max_backoff)
+                jitter = backoff_time * 0.1 * random.random()
+                total_delay = backoff_time + jitter
+                
+                time.sleep(total_delay)
+            
+            # This should never be reached, but just in case
+            if last_exception:
+                raise last_exception
+            return None
+            
+        return wrapper
+    return decorator
\ No newline at end of file
diff --git a/src/utils/validation.py b/src/utils/validation.py
new file mode 100644
index 0000000..938cb43
--- /dev/null
+++ b/src/utils/validation.py
@@ -0,0 +1,183 @@
+from urllib.parse import urlparse
+from typing import Union, List
+from ..exceptions import ValidationError
+
+
+def validate_url(url: str) -> None:
+    """Validate URL format with comprehensive checks"""
+    if not isinstance(url, str):
+        raise ValidationError(f"URL must be a string, got {type(url).__name__}")
+    
+    if not url.strip():
+        raise ValidationError("URL cannot be empty or whitespace")
+    
+    # Check URL length
+    if len(url) > 8192:  # Common URL length limit
+        raise ValidationError("URL exceeds maximum length of 8192 characters")
+    
+    try:
+        parsed = urlparse(url.strip())
+        if not parsed.scheme:
+            raise ValidationError(f"URL must include a scheme (http/https): {url}")
+        if parsed.scheme.lower() not in ['http', 'https']:
+            raise ValidationError(f"URL scheme must be http or https, got: {parsed.scheme}")
+        if not parsed.netloc:
+            raise ValidationError(f"URL must include a valid domain: {url}")
+        # Check for suspicious characters
+        if any(char in url for char in ['<', '>', '"', "'"]):
+            raise ValidationError("URL contains invalid characters")
+    except Exception as e:
+        if isinstance(e, ValidationError):
+            raise
+        raise ValidationError(f"Invalid URL format '{url}': {str(e)}")
+
+
+def validate_zone_name(zone: str = None) -> None:
+    """Validate zone name format with enhanced checks"""
+    if zone is None:
+        return  # Zone can be None (optional parameter)
+    
+    if not isinstance(zone, str):
+        raise ValidationError(f"Zone name must be a string, got {type(zone).__name__}")
+    
+    zone = zone.strip()
+    if not zone:
+        raise ValidationError("Zone name cannot be empty or whitespace")
+    
+    if len(zone) < 3:
+        raise ValidationError("Zone name must be at least 3 characters long")
+    
+    if len(zone) > 63:
+        raise ValidationError("Zone name must not exceed 63 characters")
+    
+    if not zone.replace('_', '').replace('-', '').isalnum():
+        raise ValidationError("Zone name can only contain letters, numbers, hyphens, and underscores")
+    
+    if zone.startswith('-') or zone.endswith('-'):
+        raise ValidationError("Zone name cannot start or end with a hyphen")
+    
+    if zone.startswith('_') or zone.endswith('_'):
+        raise ValidationError("Zone name cannot start or end with an underscore")
+
+
+def validate_country_code(country: str) -> None:
+    """Validate ISO country code format"""
+    if not isinstance(country, str):
+        raise ValidationError(f"Country code must be a string, got {type(country).__name__}")
+    
+    country = country.strip().lower()
+    if len(country) == 0:
+        return
+    
+    if len(country) != 2:
+        raise ValidationError("Country code must be exactly 2 characters (ISO 3166-1 alpha-2) or empty")
+    
+    if not country.isalpha():
+        raise ValidationError("Country code must contain only letters")
+
+
+def validate_timeout(timeout: int) -> None:
+    """Validate timeout value"""
+    if timeout is None:
+        return  # Timeout can be None (use default)
+    
+    if not isinstance(timeout, int):
+        raise ValidationError(f"Timeout must be an integer, got {type(timeout).__name__}")
+    
+    if timeout <= 0:
+        raise ValidationError("Timeout must be greater than 0 seconds")
+    
+    if timeout > 300:  # 5 minutes max
+        raise ValidationError("Timeout cannot exceed 300 seconds (5 minutes)")
+
+
+def validate_max_workers(max_workers: int) -> None:
+    """Validate max_workers parameter"""
+    if max_workers is None:
+        return  # Can be None (use default)
+    
+    if not isinstance(max_workers, int):
+        raise ValidationError(f"max_workers must be an integer, got {type(max_workers).__name__}")
+    
+    if max_workers <= 0:
+        raise ValidationError("max_workers must be greater than 0")
+    
+    if max_workers > 50:  # Reasonable upper limit
+        raise ValidationError("max_workers cannot exceed 50 (to prevent resource exhaustion)")
+
+
+def validate_url_list(urls: List[str], max_urls: int = 100) -> None:
+    """Validate list of URLs with size limits"""
+    if not isinstance(urls, list):
+        raise ValidationError(f"URL list must be a list, got {type(urls).__name__}")
+    
+    if len(urls) == 0:
+        raise ValidationError("URL list cannot be empty")
+    
+    if len(urls) > max_urls:
+        raise ValidationError(f"URL list cannot contain more than {max_urls} URLs")
+    
+    for i, url in enumerate(urls):
+        try:
+            validate_url(url)
+        except ValidationError as e:
+            raise ValidationError(f"Invalid URL at index {i}: {str(e)}")
+
+
+def validate_search_engine(search_engine: str) -> None:
+    """Validate search engine parameter"""
+    if not isinstance(search_engine, str):
+        raise ValidationError(f"Search engine must be a string, got {type(search_engine).__name__}")
+    
+    valid_engines = ['google', 'bing', 'yandex']
+    search_engine = search_engine.strip().lower()
+    
+    if search_engine not in valid_engines:
+        raise ValidationError(f"Invalid search engine '{search_engine}'. Valid options: {', '.join(valid_engines)}")
+
+
+def validate_query(query: Union[str, List[str]]) -> None:
+    """Validate search query parameter"""
+    if isinstance(query, str):
+        if not query.strip():
+            raise ValidationError("Search query cannot be empty or whitespace")
+        if len(query) > 2048:
+            raise ValidationError("Search query cannot exceed 2048 characters")
+    elif isinstance(query, list):
+        if len(query) == 0:
+            raise ValidationError("Query list cannot be empty")
+        if len(query) > 50:  # Reasonable limit
+            raise ValidationError("Query list cannot contain more than 50 queries")
+        for i, q in enumerate(query):
+            if not isinstance(q, str):
+                raise ValidationError(f"Query at index {i} must be a string, got {type(q).__name__}")
+            if not q.strip():
+                raise ValidationError(f"Query at index {i} cannot be empty or whitespace")
+            if len(q) > 2048:
+                raise ValidationError(f"Query at index {i} cannot exceed 2048 characters")
+    else:
+        raise ValidationError(f"Query must be a string or list of strings, got {type(query).__name__}")
+
+
+def validate_response_format(response_format: str) -> None:
+    """Validate response format parameter"""
+    if not isinstance(response_format, str):
+        raise ValidationError(f"Response format must be a string, got {type(response_format).__name__}")
+    
+    valid_formats = ['json', 'raw']
+    response_format = response_format.strip().lower()
+    
+    if response_format not in valid_formats:
+        raise ValidationError(f"Invalid response format '{response_format}'. Valid options: {', '.join(valid_formats)}")
+
+
+def validate_http_method(method: str) -> None:
+    """Validate HTTP method parameter"""
+    if not isinstance(method, str):
+        raise ValidationError(f"HTTP method must be a string, got {type(method).__name__}")
+    
+    valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']
+    method = method.strip().upper()
+    
+    if method not in valid_methods:
+        raise ValidationError(f"Invalid HTTP method '{method}'. Valid options: {', '.join(valid_methods)}")
\ No newline at end of file
diff --git a/src/utils/zone_manager.py b/src/utils/zone_manager.py
new file mode 100644
index 0000000..82a1205
--- /dev/null
+++ b/src/utils/zone_manager.py
@@ -0,0 +1,174 @@
+import requests
+import json
+import logging
+import time
+from ..exceptions import ZoneError, NetworkError, APIError
+from .retry import retry_request
+
+logger = logging.getLogger(__name__)
+
+
+class ZoneManager:
+    """Manages Bright Data zones - creation and validation"""
+    
+    def __init__(self, session: requests.Session):
+        self.session = session
+    
+    def ensure_required_zones(self, web_unlocker_zone: str, serp_zone: str):
+        """
+        Check if required zones exist and create them if they don't.
+        Raises exceptions on failure instead of silently continuing.
+        """
+        try:
+            logger.info("Checking existing zones...")
+            zones = self._get_zones_with_retry()
+            zone_names = {zone.get('name') for zone in zones}
+            logger.info(f"Found {len(zones)} existing zones")
+            
+            zones_to_create = []
+            if web_unlocker_zone not in zone_names:
+                zones_to_create.append((web_unlocker_zone, 'unblocker'))
+                logger.info(f"Need to create web unlocker zone: {web_unlocker_zone}")
+            
+            if serp_zone not in zone_names:
+                zones_to_create.append((serp_zone, 'serp'))
+                logger.info(f"Need to create SERP zone: {serp_zone}")
+            
+            if not zones_to_create:
+                logger.info("All required zones already exist")
+                return
+                
+            for zone_name, zone_type in zones_to_create:
+                logger.info(f"Creating zone: {zone_name} (type: {zone_type})")
+                self._create_zone_with_retry(zone_name, zone_type)
+                logger.info(f"Successfully created zone: {zone_name}")
+            
+            self._verify_zones_created([zone[0] for zone in zones_to_create])
+                
+        except (ZoneError, NetworkError, APIError):
+            raise
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Network error while ensuring zones exist: {e}")
+            raise NetworkError(f"Failed to ensure zones due to network error: {str(e)}")
+        except json.JSONDecodeError as e:
+            logger.error(f"Invalid JSON response while checking zones: {e}")
+            raise ZoneError(f"Invalid response format from zones API: {str(e)}")
+        except Exception as e:
+            logger.error(f"Unexpected error while ensuring zones exist: {e}")
+            raise ZoneError(f"Unexpected error during zone creation: {str(e)}")
+    
+    @retry_request(max_retries=3, backoff_factor=1.5, retry_statuses={429, 500, 502, 503, 504})
+    def _get_zones_with_retry(self):
+        """Get zones list with retry logic for network issues"""
+        response = self.session.get('https://api.brightdata.com/zone/get_active_zones')
+        
+        if response.status_code == 200:
+            try:
+                return response.json() or []
+            except json.JSONDecodeError as e:
+                raise ZoneError(f"Invalid JSON response from zones API: {str(e)}")
+        elif response.status_code == 401:
+            raise ZoneError("Unauthorized (401): Check your API token and ensure it has proper permissions")
+        elif response.status_code == 403:
+            raise ZoneError("Forbidden (403): API token lacks sufficient permissions for zone operations")
+        else:
+            raise ZoneError(f"Failed to list zones ({response.status_code}): {response.text}")
+    
+    @retry_request(max_retries=3, backoff_factor=1.5, retry_statuses={429, 500, 502, 503, 504})
+    def _create_zone_with_retry(self, zone_name: str, zone_type: str):
+        """
+        Create a new zone in Bright Data with retry logic
+        
+        Args:
+            zone_name: Name for the new zone
+            zone_type: Type of zone ('unblocker' or 'serp')
+        """
+        if zone_type == "serp":
+            plan_config = {
+                "type": "unblocker",
+                "serp": True
+            }
+        else:
+            plan_config = {
+                "type": zone_type
+            }
+            
+        payload = {
+            "plan": plan_config,
+            "zone": {
+                "name": zone_name,
+                "type": zone_type
+            }
+        }
+        
+        response = self.session.post(
+            'https://api.brightdata.com/zone',
+            json=payload
+        )
+        
+        if response.status_code in [200, 201]:
+            logger.info(f"Zone creation successful: {zone_name}")
+            return response
+        elif response.status_code == 409 or "Duplicate zone name" in response.text or "already exists" in response.text.lower():
+            logger.info(f"Zone {zone_name} already exists - this is expected")
+            return response
+        elif response.status_code == 401:
+            raise ZoneError(f"Unauthorized (401): API token invalid or lacks permissions to create zone '{zone_name}'")
+        elif response.status_code == 403:
+            raise ZoneError(f"Forbidden (403): API token lacks permissions to create zone '{zone_name}'. Note: sdk_unlocker and sdk_serp zones should be allowed for all permissions.")
+        elif response.status_code == 400:
+            raise ZoneError(f"Bad request (400) creating zone '{zone_name}': {response.text}")
+        else:
+            raise ZoneError(f"Failed to create zone '{zone_name}' ({response.status_code}): {response.text}")
+    
+    def _verify_zones_created(self, zone_names: list):
+        """
+        Verify that zones were successfully created by checking the zones list
+        """
+        max_attempts = 3
+        for attempt in range(max_attempts):
+            try:
+                logger.info(f"Verifying zone creation (attempt {attempt + 1}/{max_attempts})")
+                time.sleep(1)
+                
+                zones = self._get_zones_with_retry()
+                existing_zone_names = {zone.get('name') for zone in zones}
+                
+                missing_zones = [name for name in zone_names if name not in existing_zone_names]
+                
+                if not missing_zones:
+                    logger.info("All zones verified successfully")
+                    return
+                    
+                if attempt == max_attempts - 1:
+                    raise ZoneError(f"Zone verification failed: zones {missing_zones} not found after creation")
+                    
+                logger.warning(f"Zones not yet visible: {missing_zones}. Retrying verification...")
+                
+            except (ZoneError, NetworkError):
+                if attempt == max_attempts - 1:
+                    raise
+                logger.warning(f"Zone verification attempt {attempt + 1} failed, retrying...")
+                time.sleep(2 ** attempt)
+    
+    def _create_zone(self, zone_name: str, zone_type: str):
+        """
+        Legacy method - kept for backward compatibility
+        Use _create_zone_with_retry instead for new code
+        """
+        return self._create_zone_with_retry(zone_name, zone_type)
+    
+    def list_zones(self):
+        """
+        List all active zones in your Bright Data account
+        
+        Returns:
+            List of zone dictionaries with their configurations
+        """
+        try:
+            return self._get_zones_with_retry()
+        except (ZoneError, NetworkError):
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error listing zones: {e}")
+            raise ZoneError(f"Unexpected error while listing zones: {str(e)}")
\ No newline at end of file

From cf238011d4bc65db5f8aa7461d7b9ffa14b7f6a1 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:38:01 +0200
Subject: [PATCH 22/70] Create tst

---
 src/schemas/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/schemas/tst

diff --git a/src/schemas/tst b/src/schemas/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/schemas/tst
@@ -0,0 +1 @@
+

From 50f548aafb7e4fd1e5618a906ab7930dfb8d9d7f Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:38:18 +0200
Subject: [PATCH 23/70] Create tst

---
 src/types/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/types/tst

diff --git a/src/types/tst b/src/types/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/types/tst
@@ -0,0 +1 @@
+

From a714d3b55374080ab908a754c056009e9dc1fe91 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:40:44 +0200
Subject: [PATCH 24/70] Add files via upload

---
 README.md | 409 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 409 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..04fa2cc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,409 @@
+
+<img width="1300" height="200" alt="sdk-banner(1)" src="https://github.com/user-attachments/assets/c4a7857e-10dd-420b-947a-ed2ea5825cb8" />
+
+<h3 align="center">Python SDK by Bright Data, Easy-to-use scalable methods for web search & scraping</h3>
+<p></p>
+
+## Installation
+To install the package, open your terminal:
+
+```python
+pip install brightdata-sdk
+```
+> If using macOS, first open a virtual environment for your project
+
+## Quick Start
+
+Create a [Bright Data](https://brightdata.com/cp/setting/) account and copy your API key
+
+### Initialize the Client
+
+```python
+from brightdata import bdclient
+
+client = bdclient(api_token="your_api_token_here") # can also be defined as BRIGHTDATA_API_TOKEN in your .env file
+```
+
+### Launch first request
+Add to your code a serp function
+```python
+results = client.search("best selling shoes")
+
+print(client.parse_content(results))
+```
+
+<img width="4774" height="2149" alt="final-banner" src="https://github.com/user-attachments/assets/1ef4f6ad-b5f2-469f-a260-36d1eeaf8dba" />
+
+## Features
+
+| Feature                        | Functions                   | Description
+|--------------------------|-----------------------------|-------------------------------------
+| **Scrape every website** | `scrape`                    | Scrape every website using Bright's scraping and unti bot-detection capabilities
+| **Web search**           | `search`                    | Search google and other search engines by query (supports batch searches)
+| **Web crawling**         | `crawl`                     | Discover and scrape multiple pages from websites with advanced filtering and depth control
+| **AI-powered extraction** | `extract`                  | Extract specific information from websites using natural language queries and OpenAI
+| **Content parsing**      | `parse_content`             | Extract text, links, images and structured data from API responses (JSON or HTML)
+| **Browser automation**   | `connect_browser`           | Get WebSocket endpoint for Playwright/Selenium integration with Bright Data's scraping browser
+| **Search chatGPT**       | `search_chatGPT`            | Prompt chatGPT and scrape its answers, support multiple inputs and follow-up prompts
+| **Search linkedin**      | `search_linkedin.posts()`, `search_linkedin.jobs()`, `search_linkedin.profiles()` | Search LinkedIn by specific queries, and recieve structured data
+| **Scrape linkedin**      | `scrape_linkedin.posts()`, `scrape_linkedin.jobs()`, `scrape_linkedin.profiles()`, `scrape_linkedin.companies()` | Scrape LinkedIn and recieve structured data
+| **Download functions**   | `download_snapshot`, `download_content`  | Download content for both sync and async requests
+| **Client class**         | `bdclient`         | Handles authentication, automatic zone creation and managment, and options for robust error handling
+| **Parallel processing**  | **all functions**  | All functions use Concurrent processing for multiple URLs or queries, and support multiple Output Formats
+
+### Try usig one of the functions
+
+#### `Search()`
+```python
+# Simple single query search
+result = client.search("pizza restaurants")
+
+# Try using multiple queries (parallel processing), with custom configuration
+queries = ["pizza", "restaurants", "delivery"]
+results = client.search(
+    queries,
+    search_engine="bing",
+    country="gb",
+    format="raw"
+)
+```
+#### `scrape()`
+```python
+# Simple single URL scrape
+result = client.scrape("https://example.com")
+
+# Multiple URLs (parallel processing) with custom options
+urls = ["https://example1.com", "https://example2.com", "https://example3.com"]
+results = client.scrape(
+    "urls",
+    format="raw",
+    country="gb",
+    data_format="screenshot"
+)
+```
+#### `search_chatGPT()`
+```python
+result = client.search_chatGPT(
+    prompt="what day is it today?"
+    # prompt=["What are the top 3 programming languages in 2024?", "Best hotels in New York", "Explain quantum computing"],
+    # additional_prompt=["Can you explain why?", "Are you sure?", ""]  
+)
+
+client.download_content(result) # In case of timeout error, your snapshot_id is presented and you will downloaded it using download_snapshot()
+```
+
+#### `search_linkedin.`
+Available functions:
+client.**`search_linkedin.posts()`**,client.**`search_linkedin.jobs()`**,client.**`search_linkedin.profiles()`**
+```python
+# Search LinkedIn profiles by name
+first_names = ["James", "Idan"]
+last_names = ["Smith", "Vilenski"]
+
+result = client.search_linkedin.profiles(first_names, last_names) # can also be changed to async
+# will print the snapshot_id, which can be downloaded using the download_snapshot() function
+```
+
+#### `scrape_linkedin.`
+Available functions
+
+client.**`scrape_linkedin.posts()`**,client.**`scrape_linkedin.jobs()`**,client.**`scrape_linkedin.profiles()`**,client.**`scrape_linkedin.companies()`**
+```python
+post_urls = [
+    "https://www.linkedin.com/posts/orlenchner_scrapecon-activity-7180537307521769472-oSYN?trk=public_profile",
+    "https://www.linkedin.com/pulse/getting-value-out-sunburst-guillaume-de-b%C3%A9naz%C3%A9?trk=public_profile_article_view"
+]
+
+results = client.scrape_linkedin.posts(post_urls) # can also be changed to async
+
+print(results) # will print the snapshot_id, which can be downloaded using the download_snapshot() function
+```
+
+#### `crawl()`
+```python
+# Single URL crawl with filters
+result = client.crawl(
+    url="https://example.com/",
+    depth=2,
+    filter="/product/",           # Only crawl URLs containing "/product/"
+    exclude_filter="/ads/",       # Exclude URLs containing "/ads/"
+    custom_output_fields=["markdown", "url", "page_title"]
+)
+print(f"Crawl initiated. Snapshot ID: {result['snapshot_id']}")
+
+# Download crawl results
+data = client.download_snapshot(result['snapshot_id'])
+```
+
+#### `parse_content()`
+```python
+# Parse scraping results
+scraped_data = client.scrape("https://example.com")
+parsed = client.parse_content(
+    scraped_data, 
+    extract_text=True, 
+    extract_links=True, 
+    extract_images=True
+)
+print(f"Title: {parsed['title']}")
+print(f"Text length: {len(parsed['text'])}")
+print(f"Found {len(parsed['links'])} links")
+```
+
+#### `extract()`
+```python
+# Basic extraction (URL in query)
+result = client.extract("Extract news headlines from CNN.com")
+print(result)
+
+# Using URL parameter with structured output
+schema = {
+    "type": "object",
+    "properties": {
+        "headlines": {
+            "type": "array",
+            "items": {"type": "string"}
+        }
+    },
+    "required": ["headlines"]
+}
+
+result = client.extract(
+    query="Extract main headlines",
+    url="https://cnn.com",
+    output_scheme=schema
+)
+print(result)  # Returns structured JSON matching the schema
+```
+
+#### `connect_browser()`
+```python
+# For Playwright (default browser_type)
+from playwright.sync_api import sync_playwright
+
+client = bdclient(
+    api_token="your_api_token",
+    browser_username="username-zone-browser_zone1",
+    browser_password="your_password"
+)
+
+with sync_playwright() as playwright:
+    browser = playwright.chromium.connect_over_cdp(client.connect_browser())
+    page = browser.new_page()
+    page.goto("https://example.com")
+    print(f"Title: {page.title()}")
+    browser.close()
+```
+
+**`download_content`** (for sync requests)
+```python
+data = client.scrape("https://example.com")
+client.download_content(data) 
+```
+**`download_snapshot`** (for async requests)
+```python
+# Save this function to seperate file
+client.download_snapshot("") # Insert your snapshot_id
+```
+
+> [!TIP]
+> Hover over the "search" or each function in the package, to see all its available parameters.
+
+![Hover-Over1](https://github.com/user-attachments/assets/51324485-5769-48d5-8f13-0b534385142e)
+
+## Function Parameters
+<details>
+    <summary>🔍 <strong>Search(...)</strong></summary>
+    
+Searches using the SERP API. Accepts the same arguments as scrape(), plus:
+
+```python
+- `query`: Search query string or list of queries
+- `search_engine`: "google", "bing", or "yandex"
+- Other parameters same as scrape()
+```
+    
+</details>
+<details>
+    <summary>🔗 <strong>scrape(...)</strong></summary>
+
+Scrapes a single URL or list of URLs using the Web Unlocker.
+
+```python
+- `url`: Single URL string or list of URLs
+- `zone`: Zone identifier (auto-configured if None)
+- `format`: "json" or "raw"
+- `method`: HTTP method
+- `country`: Two-letter country code
+- `data_format`: "markdown", "screenshot", etc.
+- `async_request`: Enable async processing
+- `max_workers`: Max parallel workers (default: 10)
+- `timeout`: Request timeout in seconds (default: 30)
+```
+
+</details>
+<details>
+    <summary>🕷️ <strong>crawl(...)</strong></summary>
+
+Discover and scrape multiple pages from websites with advanced filtering.
+
+```python
+- `url`: Single URL string or list of URLs to crawl (required)
+- `ignore_sitemap`: Ignore sitemap when crawling (optional)
+- `depth`: Maximum crawl depth relative to entered URL (optional)
+- `filter`: Regex to include only certain URLs (e.g. "/product/")
+- `exclude_filter`: Regex to exclude certain URLs (e.g. "/ads/")
+- `custom_output_fields`: List of output fields to include (optional)
+- `include_errors`: Include errors in response (default: True)
+```
+
+</details>
+<details>
+    <summary>🔍 <strong>parse_content(...)</strong></summary>
+
+Extract and parse useful information from API responses.
+
+```python
+- `data`: Response data from scrape(), search(), or crawl() methods
+- `extract_text`: Extract clean text content (default: True)
+- `extract_links`: Extract all links from content (default: False)
+- `extract_images`: Extract image URLs from content (default: False)
+```
+
+</details>
+<details>
+    <summary>🤖 <strong>extract(...)</strong></summary>
+
+Extract specific information from websites using AI-powered natural language processing with OpenAI.
+
+```python
+- `query`: Natural language query describing what to extract (required)
+- `url`: Single URL or list of URLs to extract from (optional - if not provided, extracts URL from query)
+- `output_scheme`: JSON Schema for OpenAI Structured Outputs (optional - enables reliable JSON responses)
+- `llm_key`: OpenAI API key (optional - uses OPENAI_API_KEY env variable if not provided)
+
+# Returns: ExtractResult object (string-like with metadata attributes)
+# Available attributes: .url, .query, .source_title, .token_usage, .content_length
+```
+
+</details>
+<details>
+    <summary>🌐 <strong>connect_browser(...)</strong></summary>
+
+Get WebSocket endpoint for browser automation with Bright Data's scraping browser.
+
+```python
+# Required client parameters:
+- `browser_username`: Username for browser API (format: "username-zone-{zone_name}")
+- `browser_password`: Password for browser API authentication
+- `browser_type`: "playwright", "puppeteer", or "selenium" (default: "playwright")
+
+# Returns: WebSocket endpoint URL string
+```
+
+</details>
+<details>
+    <summary>💾 <strong>Download_Content(...)</strong></summary>
+
+Save content to local file.
+
+```python
+- `content`: Content to save
+- `filename`: Output filename (auto-generated if None)
+- `format`: File format ("json", "csv", "txt", etc.)
+```
+
+</details>
+<details>
+    <summary>⚙️ <strong>Configuration Constants</strong></summary>
+
+<p></p>
+
+| Constant               | Default | Description                     |
+| ---------------------- | ------- | ------------------------------- |
+| `DEFAULT_MAX_WORKERS`  | `10`    | Max parallel tasks              |
+| `DEFAULT_TIMEOUT`      | `30`    | Request timeout (in seconds)    |
+| `CONNECTION_POOL_SIZE` | `20`    | Max concurrent HTTP connections |
+| `MAX_RETRIES`          | `3`     | Retry attempts on failure       |
+| `RETRY_BACKOFF_FACTOR` | `1.5`   | Exponential backoff multiplier  |
+
+</details>
+
+##  Advanced Configuration
+
+<details>
+    <summary>🔧 <strong>Environment Variables</strong></summary>
+
+Create a `.env` file in your project root:
+
+```env
+BRIGHTDATA_API_TOKEN=your_bright_data_api_token
+WEB_UNLOCKER_ZONE=your_web_unlocker_zone        # Optional
+SERP_ZONE=your_serp_zone                        # Optional
+BROWSER_ZONE=your_browser_zone                  # Optional
+BRIGHTDATA_BROWSER_USERNAME=username-zone-name  # For browser automation
+BRIGHTDATA_BROWSER_PASSWORD=your_browser_password  # For browser automation
+OPENAI_API_KEY=your_openai_api_key              # For extract() function
+```
+
+</details>
+<details>
+    <summary>🌐 <strong>Manage Zones</strong></summary>
+
+List all active zones
+
+```python
+# List all active zones
+zones = client.list_zones()
+print(f"Found {len(zones)} zones")
+```
+
+Configure a custom zone name
+
+```python
+client = bdclient(
+    api_token="your_token",
+    auto_create_zones=False,          # Else it creates the Zone automatically
+    web_unlocker_zone="custom_zone",
+    serp_zone="custom_serp_zone"
+)
+
+```
+
+</details>
+<details>
+    <summary>👥 <strong>Client Management</strong></summary>
+    
+bdclient Class - Complete parameter list
+    
+```python
+bdclient(
+    api_token: str = None,                    # Your Bright Data API token (required)
+    auto_create_zones: bool = True,           # Auto-create zones if they don't exist
+    web_unlocker_zone: str = None,            # Custom web unlocker zone name
+    serp_zone: str = None,                    # Custom SERP zone name
+    browser_zone: str = None,                 # Custom browser zone name
+    browser_username: str = None,             # Browser API username (format: "username-zone-{zone_name}")
+    browser_password: str = None,             # Browser API password
+    browser_type: str = "playwright",         # Browser automation tool: "playwright", "puppeteer", "selenium"
+    log_level: str = "INFO",                  # Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
+    structured_logging: bool = True,          # Use structured JSON logging
+    verbose: bool = None                      # Enable verbose logging (overrides log_level if True)
+)
+```
+    
+</details>
+<details>
+    <summary>⚠️ <strong>Error Handling</strong></summary>
+    
+bdclient Class
+    
+The SDK includes built-in input validation and retry logic
+
+In case of zone related problems, use the **list_zones()** function to check your active zones, and check that your [**account settings**](https://brightdata.com/cp/setting/users), to verify that your API key have **"admin permissions"**.
+    
+</details>
+
+## Support
+
+For any issues, contact [Bright Data support](https://brightdata.com/contact), or open an issue in this repository.

From c89460c6d9d64b2d652bd9e59b3e8470a2833dc5 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 11:43:29 +0200
Subject: [PATCH 25/70] Delete examples/tst

---
 examples/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 examples/tst

diff --git a/examples/tst b/examples/tst
deleted file mode 100644
index 8b13789..0000000
--- a/examples/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From 1feb6a75b0247da7e653a4c9732d151f5c3c2ec6 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:09:20 +0200
Subject: [PATCH 26/70] Delete src/schemas/tst

---
 src/schemas/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 src/schemas/tst

diff --git a/src/schemas/tst b/src/schemas/tst
deleted file mode 100644
index 8b13789..0000000
--- a/src/schemas/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From 270c3de201e4b53adc3aec67b97c12f84ee507f3 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:10:08 +0200
Subject: [PATCH 27/70] Create tst

---
 src/schemas/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/schemas/tst

diff --git a/src/schemas/tst b/src/schemas/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/schemas/tst
@@ -0,0 +1 @@
+

From e60e6e5951884bc8661848ff1ef111d02a2cbde8 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:38:38 +0200
Subject: [PATCH 28/70] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 04fa2cc..b1c8b96 100644
--- a/README.md
+++ b/README.md
@@ -125,7 +125,7 @@ print(results) # will print the snapshot_id, which can be downloaded using the d
 result = client.crawl(
     url="https://example.com/",
     depth=2,
-    filter="/product/",           # Only crawl URLs containing "/product/"
+    include_filter="/product/",           # Only crawl URLs containing "/product/"
     exclude_filter="/ads/",       # Exclude URLs containing "/ads/"
     custom_output_fields=["markdown", "url", "page_title"]
 )

From 5e39aed6ec8aa39924befabf534d37fda94c4a94 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:41:47 +0200
Subject: [PATCH 29/70] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b1c8b96..a9a71bb 100644
--- a/README.md
+++ b/README.md
@@ -251,7 +251,7 @@ Discover and scrape multiple pages from websites with advanced filtering.
 - `url`: Single URL string or list of URLs to crawl (required)
 - `ignore_sitemap`: Ignore sitemap when crawling (optional)
 - `depth`: Maximum crawl depth relative to entered URL (optional)
-- `filter`: Regex to include only certain URLs (e.g. "/product/")
+- `include_filter`: Regex to include only certain URLs (e.g. "/product/")
 - `exclude_filter`: Regex to exclude certain URLs (e.g. "/ads/")
 - `custom_output_fields`: List of output fields to include (optional)
 - `include_errors`: Include errors in response (default: True)

From 51ca070ec1da6d0f5ed3b273bec45b151643c906 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 12:49:01 +0200
Subject: [PATCH 30/70] Update client.py

---
 src/client.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/client.py b/src/client.py
index b148792..f5fff73 100644
--- a/src/client.py
+++ b/src/client.py
@@ -648,7 +648,7 @@ def crawl(
         url: Union[str, List[str]],
         ignore_sitemap: bool = None,
         depth: int = None,
-        filter: str = None,
+        include_filter: str = None,
         exclude_filter: str = None,
         custom_output_fields: List[str] = None,
         include_errors: bool = True
@@ -663,7 +663,7 @@ def crawl(
         - `url` (str | List[str]): Domain URL(s) to crawl (required)
         - `ignore_sitemap` (bool, optional): Ignore sitemap when crawling
         - `depth` (int, optional): Maximum depth to crawl relative to the entered URL
-        - `filter` (str, optional): Regular expression to include only certain URLs (e.g. "/product/")
+        - `include_filter` (str, optional): Regular expression to include only certain URLs (e.g. "/product/")
         - `exclude_filter` (str, optional): Regular expression to exclude certain URLs (e.g. "/ads/")
         - `custom_output_fields` (List[str], optional): Custom output schema fields to include
         - `include_errors` (bool, optional): Include errors in response (default: True)
@@ -681,7 +681,7 @@ def crawl(
         urls = ["https://example.com/", "https://example2.com/"]
         result = client.crawl(
             url=urls,
-            filter="/product/",
+            include_filter="/product/",
             exclude_filter="/ads/",
             depth=2,
             ignore_sitemap=True
@@ -721,7 +721,7 @@ def crawl(
             url=url,
             ignore_sitemap=ignore_sitemap,
             depth=depth,
-            filter=filter,
+            include_filter=include_filter,
             exclude_filter=exclude_filter,
             custom_output_fields=custom_output_fields,
             include_errors=include_errors
@@ -894,4 +894,4 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         - `ValidationError`: Invalid query format, missing URL, or invalid LLM key
         - `APIError`: Web scraping failed or LLM processing error
         """
-        return self.extract_api.extract(query, url, output_scheme, llm_key)
\ No newline at end of file
+        return self.extract_api.extract(query, url, output_scheme, llm_key)

From 89f7a66c80ef3ca99b465373cfa422e442109dc4 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 13:09:47 +0200
Subject: [PATCH 31/70] Update client.py

---
 src/client.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/client.py b/src/client.py
index f5fff73..d2a27dd 100644
--- a/src/client.py
+++ b/src/client.py
@@ -247,6 +247,21 @@ def scrape(
         - `AuthenticationError`: Invalid API token or insufficient permissions
         - `APIError`: Request failed or server error
         """
+        
+        # URL validation
+        
+        if not url:
+            raise ValueError("The 'url' parameter cannot be None or empty.")
+
+        if isinstance(url, str):
+            if not url.strip():
+                raise ValueError("The 'url' string cannot be empty or whitespace.")
+        elif isinstance(url, list):
+            if not all(isinstance(u, str) and u.strip() for u in url):
+                raise ValueError("All URLs in the list must be non-empty strings.")
+        else:
+            raise TypeError("The 'url' parameter must be a string or a list of strings.")
+            
         zone = zone or self.web_unlocker_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         

From 93b7e653e95b014c17448379cc75cfd18fde2168 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 13:13:00 +0200
Subject: [PATCH 32/70] Update client.py

Add query validation in search()
---
 src/client.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/client.py b/src/client.py
index d2a27dd..6c2fdf7 100644
--- a/src/client.py
+++ b/src/client.py
@@ -335,6 +335,20 @@ def search(
         - `AuthenticationError`: Invalid API token or insufficient permissions  
         - `APIError`: Request failed or server error
         """
+        
+        # Query validation
+        if not query:
+            raise ValueError("The 'query' parameter cannot be None or empty.")
+    
+        if isinstance(query, str):
+            if not query.strip():
+                raise ValueError("The 'query' string cannot be empty or whitespace.")
+        elif isinstance(query, list):
+            if not all(isinstance(q, str) and q.strip() for q in query):
+                raise ValueError("All queries in the list must be non-empty strings.")
+        else:
+            raise TypeError("The 'query' parameter must be a string or a list of strings.")
+
         zone = zone or self.serp_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         

From e06a3c8bb49c3bfb7529d3a897c5fda92963b693 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 13:16:45 +0200
Subject: [PATCH 33/70] Update client.py

Add URL validation in crawl()
---
 src/client.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/client.py b/src/client.py
index 6c2fdf7..2f614f4 100644
--- a/src/client.py
+++ b/src/client.py
@@ -746,6 +746,21 @@ def crawl(
         - `AuthenticationError`: Invalid API token or insufficient permissions
         - `APIError`: Request failed or server error
         """
+
+        # URL validation
+        
+        if not url:
+            raise ValueError("The 'url' parameter cannot be None or empty.")
+
+        if isinstance(url, str):
+            if not url.strip():
+                raise ValueError("The 'url' string cannot be empty or whitespace.")
+        elif isinstance(url, list):
+            if not all(isinstance(u, str) and u.strip() for u in url):
+                raise ValueError("All URLs in the list must be non-empty strings.")
+        else:
+            raise TypeError("The 'url' parameter must be a string or a list of strings.")
+            
         return self.crawl_api.crawl(
             url=url,
             ignore_sitemap=ignore_sitemap,

From c240a966576657a18c7dc23878aecb6a01394d04 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 13:40:47 +0200
Subject: [PATCH 34/70] Update client.py

Add depth parameter validation in crawl()
---
 src/client.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index 2f614f4..ca0d857 100644
--- a/src/client.py
+++ b/src/client.py
@@ -760,7 +760,15 @@ def crawl(
                 raise ValueError("All URLs in the list must be non-empty strings.")
         else:
             raise TypeError("The 'url' parameter must be a string or a list of strings.")
-            
+
+        # Depth validation
+    
+        if depth is not None:
+            if not isinstance(depth, int):
+                raise TypeError("The 'depth' parameter must be an integer.")
+            if depth <= 0:
+                raise ValueError("The 'depth' parameter must be a positive integer.")
+                
         return self.crawl_api.crawl(
             url=url,
             ignore_sitemap=ignore_sitemap,

From 816985a00c2900f0c6894c1183d64fd349e28451 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:05:31 +0200
Subject: [PATCH 35/70] Update client.py

Add snapshot_id format validation in download_snapshot()
---
 src/client.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/client.py b/src/client.py
index ca0d857..c5e40b3 100644
--- a/src/client.py
+++ b/src/client.py
@@ -596,6 +596,20 @@ def download_snapshot(
         - `AuthenticationError`: Invalid API token or insufficient permissions
         - `APIError`: Request failed, snapshot not found, or server error
         """
+        
+        # snapshot_id validation
+    
+        if not snapshot_id or not isinstance(snapshot_id, str):
+            raise ValueError("The 'snapshot_id' parameter must be a non-empty string.")
+        
+        # format validation
+    
+        allowed_formats = {"json", "ndjson", "jsonl", "csv"}
+        if format not in allowed_formats:
+            raise ValueError(
+                f"Invalid 'format' value: '{format}'. Must be one of {sorted(allowed_formats)}."
+            )
+
         return self.download_api.download_snapshot(snapshot_id, format, compress, batch_size, part)
 
 

From fb6ec7cdebe83bce3bb320cdf38daf368176b279 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:13:56 +0200
Subject: [PATCH 36/70] Update client.py

Add security warning in connect_browser() docstring
---
 src/client.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index c5e40b3..057fc1d 100644
--- a/src/client.py
+++ b/src/client.py
@@ -628,7 +628,11 @@ def connect_browser(self) -> str:
         
         Returns the WebSocket endpoint URL that can be used with Playwright or Selenium
         to connect to Bright Data's scraping browser service.
-        
+
+        ** Security Warning:** The returned URL includes your browser credentials in plain text.
+        Avoid logging, sharing, or exposing this URL in publicly accessible places. Treat it as sensitive information.
+
+
         ### Returns:
             WebSocket endpoint URL string for browser connection
             

From 47eb72a0fb09b6970119caf5fe24a02ece785eff Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:19:50 +0200
Subject: [PATCH 37/70] Update client.py

Better error message for missing LLM key in extract()
---
 src/client.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index 057fc1d..47602de 100644
--- a/src/client.py
+++ b/src/client.py
@@ -857,7 +857,10 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         Combines web scraping with OpenAI's language models to extract targeted information
         from web pages based on natural language queries. Automatically parses URLs and
         optimizes content for efficient LLM processing.
-        
+
+        ** LLM Key Notice:** If `llm_key` is not provided, the method will attempt to read 
+        the OpenAI API key from the `OPENAI_API_KEY` environment variable. Ensure it is set.
+
         ### Parameters:
         - `query` (str): Natural language query describing what to extract. If `url` parameter is provided,
                         this becomes the pure extraction query. If `url` is not provided, this should include 
@@ -964,4 +967,16 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         - `ValidationError`: Invalid query format, missing URL, or invalid LLM key
         - `APIError`: Web scraping failed or LLM processing error
         """
+
+        # Validate LLM key
+        if llm_key is None:
+            import os
+            llm_key = os.getenv("OPENAI_API_KEY")
+            if not llm_key:
+                raise ValidationError(
+                    "Missing OpenAI API key. Provide it via the `llm_key` parameter or set the "
+                    "`OPENAI_API_KEY` environment variable. Example:\n\n"
+                    "export OPENAI_API_KEY='your-openai-api-key'"
+                )
+            
         return self.extract_api.extract(query, url, output_scheme, llm_key)

From c9387236d5367227e27654aa3c93df47e2c8dd68 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:23:34 +0200
Subject: [PATCH 38/70] Update client.py

change the constant DEFAULT_TIMEOUT to 30
---
 src/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index 47602de..2bab9d4 100644
--- a/src/client.py
+++ b/src/client.py
@@ -35,7 +35,7 @@ class bdclient:
     """Main client for the Bright Data SDK"""
     
     DEFAULT_MAX_WORKERS = 10
-    DEFAULT_TIMEOUT = 65
+    DEFAULT_TIMEOUT = 30
     CONNECTION_POOL_SIZE = 20
     MAX_RETRIES = 3
     RETRY_BACKOFF_FACTOR = 1.5

From b480df35772a51584818a149f2286f3ff8ca7dab Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:27:26 +0200
Subject: [PATCH 39/70] Update client.py

Add clear validation with helpful
error message pointing to env variable
---
 src/client.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/client.py b/src/client.py
index 2bab9d4..6af6a2d 100644
--- a/src/client.py
+++ b/src/client.py
@@ -859,7 +859,7 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         optimizes content for efficient LLM processing.
 
         ** LLM Key Notice:** If `llm_key` is not provided, the method will attempt to read 
-        the OpenAI API key from the `OPENAI_API_KEY` environment variable. Ensure it is set.
+        the BRIGHTDATA API key from the `BRIGHTDATA_API_TOKEN` environment variable. Ensure it is set.
 
         ### Parameters:
         - `query` (str): Natural language query describing what to extract. If `url` parameter is provided,
@@ -971,12 +971,12 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         # Validate LLM key
         if llm_key is None:
             import os
-            llm_key = os.getenv("OPENAI_API_KEY")
+            llm_key = os.getenv("BRIGHTDATA_API_TOKEN")
             if not llm_key:
                 raise ValidationError(
-                    "Missing OpenAI API key. Provide it via the `llm_key` parameter or set the "
-                    "`OPENAI_API_KEY` environment variable. Example:\n\n"
-                    "export OPENAI_API_KEY='your-openai-api-key'"
+                    "Missing API key. Provide it via the `llm_key` parameter or set the "
+                    "`BRIGHTDATA_API_TOKEN` environment variable. Example:\n\n"
+                    "export BRIGHTDATA_API_TOKEN='your-openai-api-key'"
                 )
             
         return self.extract_api.extract(query, url, output_scheme, llm_key)

From 10632e08d654062367d22dbf589548b1655e9fb3 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Tue, 28 Oct 2025 14:32:39 +0200
Subject: [PATCH 40/70] Update README.md

Change client.download_snapshot("") to have placeholder comment
---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a9a71bb..99f56bb 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,9 @@ client.download_content(data)
 **`download_snapshot`** (for async requests)
 ```python
 # Save this function to seperate file
-client.download_snapshot("") # Insert your snapshot_id
+# Download snapshot
+snapshot_id = "your_snapshot_id_here"  # <-- Replace with your actual snapshot ID
+client.download_snapshot(snapshot_id)  # Insert your snapshot_id
 ```
 
 > [!TIP]

From fcafb4185bb5bbc9854c2b694f1a6053d246c9f5 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:45:43 +0200
Subject: [PATCH 41/70] Create tst

---
 src/exceptions/tst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 src/exceptions/tst

diff --git a/src/exceptions/tst b/src/exceptions/tst
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/exceptions/tst
@@ -0,0 +1 @@
+

From cf040c67f9e74a868f94ca632223c0fb37797699 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:46:51 +0200
Subject: [PATCH 42/70] Add files via upload

---
 src/exceptions/__init__.py | 17 +++++++++++++++++
 src/exceptions/errors.py   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 src/exceptions/__init__.py
 create mode 100644 src/exceptions/errors.py

diff --git a/src/exceptions/__init__.py b/src/exceptions/__init__.py
new file mode 100644
index 0000000..6554555
--- /dev/null
+++ b/src/exceptions/__init__.py
@@ -0,0 +1,17 @@
+from .errors import (
+    BrightDataError,
+    ValidationError,
+    AuthenticationError,
+    ZoneError,
+    NetworkError,
+    APIError
+)
+
+__all__ = [
+    'BrightDataError',
+    'ValidationError', 
+    'AuthenticationError',
+    'ZoneError',
+    'NetworkError',
+    'APIError'
+]
\ No newline at end of file
diff --git a/src/exceptions/errors.py b/src/exceptions/errors.py
new file mode 100644
index 0000000..1cf4425
--- /dev/null
+++ b/src/exceptions/errors.py
@@ -0,0 +1,31 @@
+class BrightDataError(Exception):
+    """Base exception for all Bright Data SDK errors"""
+    pass
+
+
+class ValidationError(BrightDataError):
+    """Raised when input validation fails"""
+    pass
+
+
+class AuthenticationError(BrightDataError):
+    """Raised when API authentication fails"""
+    pass
+
+
+class ZoneError(BrightDataError):
+    """Raised when zone operations fail"""
+    pass
+
+
+class NetworkError(BrightDataError):
+    """Raised when network operations fail"""
+    pass
+
+
+class APIError(BrightDataError):
+    """Raised when API requests fail"""
+    def __init__(self, message, status_code=None, response_text=None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.response_text = response_text
\ No newline at end of file

From 50969f5c817a461ca13af95a3ce1e3fb415991ac Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:47:07 +0200
Subject: [PATCH 43/70] Delete src/exceptions/tst

---
 src/exceptions/tst | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 src/exceptions/tst

diff --git a/src/exceptions/tst b/src/exceptions/tst
deleted file mode 100644
index 8b13789..0000000
--- a/src/exceptions/tst
+++ /dev/null
@@ -1 +0,0 @@
-

From a965fa8486aedb7a1fa1fc072d10cc38a42951f5 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:48:42 +0200
Subject: [PATCH 44/70] Add files via upload

---
 LICENSE | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1a22bad
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Bright Data
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file

From 9d368a16352d369e18bc67bef17d740905dfa766 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 10:49:39 +0200
Subject: [PATCH 45/70] Add files via upload

---
 requirements.txt | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 requirements.txt

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..625eed3
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+requests>=2.25.0
+python-dotenv>=0.19.0
+aiohttp>=3.8.0
+beautifulsoup4>=4.9.0
+openai>=1.0.0
\ No newline at end of file

From 99f3b79d9a1438c168e5f6cb23347faf2245115a Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:10:26 +0200
Subject: [PATCH 46/70] Update client.py

update and improve client.search_GPT
---
 src/client.py | 78 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/src/client.py b/src/client.py
index 6af6a2d..0933ade 100644
--- a/src/client.py
+++ b/src/client.py
@@ -376,11 +376,13 @@ def download_content(self, content: Union[Dict, str], filename: str = None, form
     def search_chatGPT(
         self,
         prompt: Union[str, List[str]],
-        country: Union[str, List[str]] = "",
-        additional_prompt: Union[str, List[str]] = "",
+        country: Union[str, List[str]] = None,
+        secondary_prompt: Union[str, List[str]] = None,
         web_search: Union[bool, List[bool]] = False,
-        sync: bool = True
+        sync: bool = True,
+        timeout: int = None
     ) -> Dict[str, Any]:
+
         """
         ## Search ChatGPT responses using Bright Data's ChatGPT dataset API
         
@@ -424,48 +426,62 @@ def search_chatGPT(
         """
         if isinstance(prompt, str):
             prompts = [prompt]
-        else:
+        elif isinstance(prompt, list) and all(isinstance(p, str) for p in prompt):
             prompts = prompt
-            
-        if not prompts or len(prompts) == 0:
-            raise ValidationError("At least one prompt is required")
+        else:
+            raise ValidationError("Invalid prompt input: must be a non-empty string or list of strings.")
+    
+        if not prompts:
+            raise ValidationError("At least one prompt is required.")
             
         for p in prompts:
             if not p or not isinstance(p, str):
                 raise ValidationError("All prompts must be non-empty strings")
         
-        def normalize_param(param, param_name):
+        def normalize_param(param, name):
+            if param is None:
+                return [None] * len(prompts)
             if isinstance(param, list):
                 if len(param) != len(prompts):
-                    raise ValidationError(f"{param_name} list must have same length as prompts list")
+                    raise ValidationError(f"{name} list must have the same length as prompts.")
                 return param
-            else:
-                return [param] * len(prompts)
+            return [param] * len(prompts)
         
         countries = normalize_param(country, "country")
-        additional_prompts = normalize_param(additional_prompt, "additional_prompt")
+        secondary_prompts = normalize_param(secondary_prompt, "secondary_prompt")
         web_searches = normalize_param(web_search, "web_search")
         
         for c in countries:
-            if not isinstance(c, str):
-                raise ValidationError("All countries must be strings")
-                
-        for ap in additional_prompts:
-            if not isinstance(ap, str):
-                raise ValidationError("All additional_prompts must be strings")
-                
-        for ws in web_searches:
-            if not isinstance(ws, bool):
-                raise ValidationError("All web_search values must be booleans")
-        
-        return self.chatgpt_api.scrape_chatgpt(
-            prompts, 
-            countries, 
-            additional_prompts, 
-            web_searches,
-            sync,
-            self.DEFAULT_TIMEOUT
-        )
+            if c and not re.match(r"^[A-Z]{2}$", c):
+                raise ValidationError(f"Invalid country code '{c}'. Must be 2 uppercase letters.")
+        for s in secondary_prompts:
+            if s is not None and not isinstance(s, str):
+                raise ValidationError("Secondary prompts must be strings.")
+        for w in web_searches:
+            if not isinstance(w, bool):
+                raise ValidationError("Web search flags must be boolean.")
+        if timeout is not None and (not isinstance(timeout, int) or timeout <= 0):
+            raise ValidationError("Timeout must be a positive integer.")
+    
+        timeout = timeout or (65 if sync else 30)
+
+        # Retry logic
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                return self.chatgpt_api.scrape_chatgpt(
+                    prompts=prompts,
+                    countries=countries,
+                    secondary_prompts=secondary_prompts,
+                    web_searches=web_searches,
+                    sync=sync,
+                    timeout=timeout
+                )
+            except APIError as e:
+                if attempt < max_retries - 1:
+                    time.sleep(2)
+                    continue
+                raise e
 
     @property
     def scrape_linkedin(self):

From 58b6d3ed880c42d43e2f1e3c866d18e1b7df7be1 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:11:40 +0200
Subject: [PATCH 47/70] Update README.md

update search_gpt example
---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 99f56bb..7010efe 100644
--- a/README.md
+++ b/README.md
@@ -83,10 +83,10 @@ results = client.scrape(
 ```
 #### `search_chatGPT()`
 ```python
-result = client.search_chatGPT(
-    prompt="what day is it today?"
-    # prompt=["What are the top 3 programming languages in 2024?", "Best hotels in New York", "Explain quantum computing"],
-    # additional_prompt=["Can you explain why?", "Are you sure?", ""]  
+result = client.search.chatGPT(
+    prompt="Top startups in Tel Aviv",
+    country="IL",
+    web_search=True
 )
 
 client.download_content(result) # In case of timeout error, your snapshot_id is presented and you will downloaded it using download_snapshot()

From 23f9f30f3e08d0b13dbb92c61a1d042211566a29 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:23:52 +0200
Subject: [PATCH 48/70] Update README.md

fix spelling errors
---
 README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7010efe..1e2925a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 
 <img width="1300" height="200" alt="sdk-banner(1)" src="https://github.com/user-attachments/assets/c4a7857e-10dd-420b-947a-ed2ea5825cb8" />
 
-<h3 align="center">Python SDK by Bright Data, Easy-to-use scalable methods for web search & scraping</h3>
+<h3 align="center">Python SDK by Bright Data, Easy to use scalable methods for web search & scraping</h3>
 <p></p>
 
 ## Installation
@@ -10,11 +10,11 @@ To install the package, open your terminal:
 ```python
 pip install brightdata-sdk
 ```
-> If using macOS, first open a virtual environment for your project
+> If using macOS, first open a virtual environment for your project.
 
 ## Quick Start
 
-Create a [Bright Data](https://brightdata.com/cp/setting/) account and copy your API key
+Create a [Bright Data](https://brightdata.com/cp/setting/) account and copy your API key.
 
 ### Initialize the Client
 
@@ -25,7 +25,7 @@ client = bdclient(api_token="your_api_token_here") # can also be defined as BRIG
 ```
 
 ### Launch first request
-Add to your code a serp function
+Add to your code a serp function.
 ```python
 results = client.search("best selling shoes")
 
@@ -51,7 +51,7 @@ print(client.parse_content(results))
 | **Client class**         | `bdclient`         | Handles authentication, automatic zone creation and managment, and options for robust error handling
 | **Parallel processing**  | **all functions**  | All functions use Concurrent processing for multiple URLs or queries, and support multiple Output Formats
 
-### Try usig one of the functions
+### Try using one of the functions
 
 #### `Search()`
 ```python
@@ -202,14 +202,14 @@ client.download_content(data)
 ```
 **`download_snapshot`** (for async requests)
 ```python
-# Save this function to seperate file
+# Save this function to a seperate file
 # Download snapshot
 snapshot_id = "your_snapshot_id_here"  # <-- Replace with your actual snapshot ID
 client.download_snapshot(snapshot_id)  # Insert your snapshot_id
 ```
 
 > [!TIP]
-> Hover over the "search" or each function in the package, to see all its available parameters.
+> Hover over the "search" or each function in the package to see all its available parameters.
 
 ![Hover-Over1](https://github.com/user-attachments/assets/51324485-5769-48d5-8f13-0b534385142e)
 

From 83780839fd27f06b28bae545f8f25b6c3a01e09e Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 11:31:05 +0200
Subject: [PATCH 49/70] Delete tests/__init__.py

---
 tests/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 tests/__init__.py

diff --git a/tests/__init__.py b/tests/__init__.py
deleted file mode 100644
index e69de29..0000000

From 367522928b6cd13c034726467a4ea93bc04a3a60 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 12:23:58 +0200
Subject: [PATCH 50/70] Update test_client.py

add class TestClientSearchGPT
---
 tests/test_client.py | 120 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 51b1315..3618124 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -116,6 +116,124 @@ def mock_post(*args, **kwargs):
         request_data = captured_request.get('json', {})
         assert "&brd_json=1" in request_data["url"]
 
+class TestClientSearchGPT:
+    """tests for the client.search_gpt() function"""
+
+    @pytest.fixture
+    @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
+    def client(self, mock_zones):
+        """Create a test client with mocked validation"""
+        with patch.dict(os.environ, {}, clear=True):
+            from brightdata import bdclient
+            client = bdclient(api_token="valid_test_token_12345678", auto_create_zones=False)
+            return client
+
+    #  VALIDATION TESTS 
+
+    def test_prompt_required(self, client):
+        """Ensure ValidationError is raised when prompt is missing"""
+        with pytest.raises(ValidationError, match="prompt is required"):
+            client.search_gpt(prompt=None)
+
+    def test_invalid_country_format(self, client):
+        """Reject invalid country codes"""
+        with pytest.raises(ValidationError, match="must be 2-letter code"):
+            client.search_gpt(prompt="hi", country="USA")
+
+    def test_websearch_bool_validation(self, client):
+        """Reject invalid webSearch parameter type"""
+        with pytest.raises(ValidationError, match="must be a boolean or list of booleans"):
+            client.search_gpt(prompt="hi", webSearch="yes")
+
+    # PARAMETER NORMALIZATION 
+    
+    def test_normalizes_single_values_to_list(self, client):
+        """Convert single parameters to list form"""
+        result = client.search_gpt(
+            prompt="hello",
+            country="US",
+            secondaryPrompt="follow up",
+            webSearch=False,
+            sync=True
+        )
+        # The internal normalized payload should contain lists
+        assert isinstance(result["prompt"], list)
+        assert isinstance(result["country"], list)
+        assert isinstance(result["secondaryPrompt"], list)
+        assert isinstance(result["webSearch"], list)
+
+    # MOCKED API CALL TESTS 
+    
+    def test_sync_request_success(self, client, monkeypatch):
+        """Ensure sync request returns expected payload"""
+        mock_response = {"status": "ok", "data": "result"}
+        captured_payload = {}
+
+        def mock_post(url, json=None, timeout=None):
+            captured_payload.update(json or {})
+            from unittest.mock import Mock
+            r = Mock()
+            r.status_code = 200
+            r.json.return_value = mock_response
+            return r
+
+        monkeypatch.setattr(client.search_api.session, "post", mock_post)
+
+        response = client.search_gpt(prompt="Hello", country="US", sync=True)
+        assert response == mock_response
+        assert captured_payload["url"] == "https://chatgpt.com"
+        assert "prompt" in captured_payload
+        assert "country" in captured_payload
+
+    def test_async_request_timeout(self, client, monkeypatch):
+        """Ensure async mode uses correct timeout"""
+        captured_args = {}
+
+        def mock_post(url, json=None, timeout=None):
+            captured_args["timeout"] = timeout
+            from unittest.mock import Mock
+            r = Mock()
+            r.status_code = 200
+            r.json.return_value = {"ok": True}
+            return r
+
+        monkeypatch.setattr(client.search_api.session, "post", mock_post)
+
+        client.search_gpt(prompt="Async test", sync=False)
+        assert captured_args["timeout"] == 30  # default async timeout
+
+    # ERROR AND RETRY HANDLING
+
+    def test_retry_on_failure(self, client, monkeypatch):
+        """Test that request is retried on temporary failure"""
+        call_count = {"n": 0}
+
+        def mock_post(url, json=None, timeout=None):
+            call_count["n"] += 1
+            from unittest.mock import Mock
+            r = Mock()
+            r.status_code = 500 if call_count["n"] == 1 else 200
+            r.json.return_value = {"ok": True}
+            return r
+
+        monkeypatch.setattr(client.search_api.session, "post", mock_post)
+        result = client.search_gpt(prompt="retry", sync=True)
+        assert result["ok"] is True
+        assert call_count["n"] == 2  # retried once
+
+    def test_raises_error_after_max_retries(self, client, monkeypatch):
+        """Ensure error is raised after exceeding retries"""
+        def mock_post(url, json=None, timeout=None):
+            from unittest.mock import Mock
+            r = Mock()
+            r.status_code = 500
+            r.json.return_value = {"error": "server error"}
+            return r
+
+        monkeypatch.setattr(client.search_api.session, "post", mock_post)
+        with pytest.raises(RuntimeError, match="Failed after retries"):
+            client.search_gpt(prompt="fail test", sync=True)
+
 
 if __name__ == "__main__":
-    pytest.main([__file__])
\ No newline at end of file
+    pytest.main([__file__])

From b96e80911d5c29e95bc4ff6c9e5bb3f1af567551 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 17:30:45 +0200
Subject: [PATCH 51/70] Update client.py

add snapshot_id validation
---
 src/client.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/client.py b/src/client.py
index 0933ade..eb03785 100644
--- a/src/client.py
+++ b/src/client.py
@@ -617,6 +617,9 @@ def download_snapshot(
     
         if not snapshot_id or not isinstance(snapshot_id, str):
             raise ValueError("The 'snapshot_id' parameter must be a non-empty string.")
+
+        if not snapshot_id.startswith("s_"):
+            raise ValueError("Invalid 'snapshot_id' format. Expected an ID starting with 's_' (e.g., 's_m4x7enmven8djfqak').")
         
         # format validation
     

From 65d11b684e5c52728fd8cf78af124bb587b4fff3 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Wed, 29 Oct 2025 17:45:33 +0200
Subject: [PATCH 52/70] Add files via upload

---
 pyproject.toml | 137 +++++++++++++++++++++++++++++++++++++++++++++++++
 setup.py       |  70 +++++++++++++++++++++++++
 2 files changed, 207 insertions(+)
 create mode 100644 pyproject.toml
 create mode 100644 setup.py

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..0991d9f
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,137 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "brightdata-sdk"
+version = "1.1.3"
+description = "Python SDK for Bright Data Web Scraping and SERP APIs"
+authors = [
+    {name = "Bright Data", email = "support@brightdata.com"}
+]
+maintainers = [
+    {name = "Bright Data", email = "idanv@brightdata.com"}
+]
+readme = "README.md"
+license = {text = "MIT"}
+keywords = ["brightdata", "web scraping", "proxy", "serp", "search", "data extraction"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9", 
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Internet :: WWW/HTTP",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+]
+requires-python = ">=3.8"
+dependencies = [
+    "requests>=2.25.0",
+    "python-dotenv>=0.19.0",
+    "aiohttp>=3.8.0",
+    "beautifulsoup4>=4.9.0",
+    "openai>=1.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=6.0.0",
+    "pytest-cov>=2.10.0",
+    "black>=21.0.0",
+    "isort>=5.0.0",
+    "flake8>=3.8.0",
+    "mypy>=0.900",
+]
+test = [
+    "pytest>=6.0.0",
+    "pytest-cov>=2.10.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/brightdata/bright-data-sdk-python"
+Documentation = "https://github.com/brightdata/bright-data-sdk-python#readme"
+Repository = "https://github.com/brightdata/bright-data-sdk-python"
+"Bug Reports" = "https://github.com/brightdata/bright-data-sdk-python/issues"
+Changelog = "https://github.com/brightdata/bright-data-sdk-python/blob/main/CHANGELOG.md"
+
+[tool.setuptools.packages.find]
+include = ["brightdata*"]
+exclude = ["tests*"]
+
+[tool.black]
+line-length = 100
+target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
+include = '\.pyi?$'
+extend-exclude = '''
+/(
+  # directories
+  \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | build
+  | dist
+)/
+'''
+
+[tool.isort]
+profile = "black"
+line_length = 100
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
+
+[tool.flake8]
+max-line-length = 100
+extend-ignore = ["E203", "W503"]
+exclude = [
+    ".git",
+    "__pycache__",
+    ".venv",
+    "venv",
+    "build",
+    "dist",
+    "*.egg-info"
+]
+
+[tool.mypy]
+python_version = "3.8"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_unreachable = true
+strict_equality = true
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = [
+    "--strict-markers",
+    "--strict-config",
+    "--cov=brightdata",
+    "--cov-report=term-missing",
+    "--cov-report=html",
+    "--cov-report=xml",
+]
+testpaths = ["tests"]
+filterwarnings = [
+    "error",
+    "ignore::UserWarning",
+    "ignore::DeprecationWarning",
+]
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..a662168
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,70 @@
+"""
+Setup script for Bright Data SDK
+
+This file provides backward compatibility for tools that don't support pyproject.toml.
+The main configuration is in pyproject.toml following modern Python packaging standards.
+"""
+
+from setuptools import setup, find_packages
+import os
+
+# Read the README file
+def read_readme():
+    with open("README.md", "r", encoding="utf-8") as fh:
+        return fh.read()
+
+# Read version from __init__.py
+def read_version():
+    with open(os.path.join("brightdata", "__init__.py"), "r", encoding="utf-8") as fh:
+        for line in fh:
+            if line.startswith("__version__"):
+                return line.split('"')[1]
+    return "1.0.0"
+
+setup(
+    name="brightdata-sdk",
+    version=read_version(),
+    author="Bright Data",
+    author_email="support@brightdata.com",
+    description="Python SDK for Bright Data Web Scraping and SERP APIs",
+    long_description=read_readme(),
+    long_description_content_type="text/markdown",
+    url="https://github.com/brightdata/brightdata-sdk-python",
+    packages=find_packages(),
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8", 
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Topic :: Internet :: WWW/HTTP",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+    ],
+    python_requires=">=3.7",
+    install_requires=[
+        "requests>=2.25.0",
+        "python-dotenv>=0.19.0",
+    ],
+    extras_require={
+        "dev": [
+            "pytest>=6.0.0",
+            "pytest-cov>=2.10.0",
+            "black>=21.0.0",
+            "isort>=5.0.0",
+            "flake8>=3.8.0",
+        ],
+    },
+    keywords="brightdata, web scraping, proxy, serp, api, data extraction",
+    project_urls={
+        "Bug Reports": "https://github.com/brightdata/brightdata-sdk-python/issues",
+        "Documentation": "https://github.com/brightdata/brightdata-sdk-python#readme",
+        "Source": "https://github.com/brightdata/brightdata-sdk-python",
+    },
+)
\ No newline at end of file

From d2383551bcab07deda4c3f7576cdfd72d8173b54 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 12:37:48 +0200
Subject: [PATCH 53/70] Update client.py

fix imports
---
 src/client.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/client.py b/src/client.py
index eb03785..14e988a 100644
--- a/src/client.py
+++ b/src/client.py
@@ -1,17 +1,18 @@
 import os
+import re
+import time
 import json
 import requests
 from datetime import datetime
-from typing import Union, Dict, Any, List
-
-from .api import WebScraper, SearchAPI
-from .api.chatgpt import ChatGPTAPI
-from .api.linkedin import LinkedInAPI, LinkedInScraper, LinkedInSearcher
-from .api.download import DownloadAPI
 from .api.crawl import CrawlAPI
+from .api.chatgpt import ChatGPTAPI
 from .api.extract import ExtractAPI
-from .utils import ZoneManager, setup_logging, get_logger, parse_content
+from .api.download import DownloadAPI
+from .api import WebScraper, SearchAPI
+from typing import Union, Dict, Any, List
 from .exceptions import ValidationError, AuthenticationError, APIError
+from .api.linkedin import LinkedInAPI, LinkedInScraper, LinkedInSearcher
+from .utils import ZoneManager, setup_logging, get_logger, parse_content
 
 def _get_version():
     """Get version from __init__.py, cached at module import time."""

From eef631cf062a94967e6f246cbb324503c8467a0e Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:39:25 +0200
Subject: [PATCH 54/70] Update client.py

---
 src/client.py | 596 +++++++++++++++++---------------------------------
 1 file changed, 196 insertions(+), 400 deletions(-)

diff --git a/src/client.py b/src/client.py
index 14e988a..322b729 100644
--- a/src/client.py
+++ b/src/client.py
@@ -17,7 +17,6 @@
 def _get_version():
     """Get version from __init__.py, cached at module import time."""
     try:
-        import os
         init_file = os.path.join(os.path.dirname(__file__), '__init__.py')
         with open(init_file, 'r', encoding='utf-8') as f:
             for line in f:
@@ -76,6 +75,7 @@ def __init__(
             verbose: Enable verbose logging (default: False). Can also be set via BRIGHTDATA_VERBOSE env var.
                     When False, only shows WARNING and above. When True, shows all logs per log_level.
         """
+        
         try:
             from dotenv import load_dotenv
             load_dotenv()
@@ -209,7 +209,8 @@ def scrape(
         """
         ## Unlock and scrape websites using Bright Data Web Unlocker API
         
-        Scrapes one or multiple URLs through Bright Data's proxy network with anti-bot detection bypass.
+        Scrapes one or multiple websites using Bright Data's Web Unlocker and proxy network.
+        Automatically handles bot-detection, CAPTCHAs, and retries.
         
         ### Parameters:
         - `url` (str | List[str]): Single URL string or list of URLs to scrape
@@ -226,50 +227,30 @@ def scrape(
         - Single URL: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
         - Multiple URLs: `List[Union[Dict[str, Any], str]]` corresponding to each input URL
         
-        ### Example Usage:
-        ```python
-        # Single URL scraping
-        result = client.scrape(
-            url="https://example.com", 
-            response_format="json"
-        )
-        
-        # Multiple URLs scraping
-        urls = ["https://site1.com", "https://site2.com"]
-        results = client.scrape(
-            url=urls,
-            response_format="raw",
-            max_workers=5
-        )
-        ```
-        
         ### Raises:
-        - `ValidationError`: Invalid URL format or empty URL list
-        - `AuthenticationError`: Invalid API token or insufficient permissions
-        - `APIError`: Request failed or server error
+        - `ValidationError`: Invalid URL or parameters
+        - `APIError`: Scraping failed (non-2xx response or server error)
         """
         
         # URL validation
         
         if not url:
-            raise ValueError("The 'url' parameter cannot be None or empty.")
+            raise ValidationError("The 'url' parameter cannot be None or empty.")
 
         if isinstance(url, str):
             if not url.strip():
-                raise ValueError("The 'url' string cannot be empty or whitespace.")
+                raise ValidationError("The 'url' string cannot be empty or whitespace.")
         elif isinstance(url, list):
-            if not all(isinstance(u, str) and u.strip() for u in url):
-                raise ValueError("All URLs in the list must be non-empty strings.")
-        else:
-            raise TypeError("The 'url' parameter must be a string or a list of strings.")
-            
-        zone = zone or self.web_unlocker_zone
-        max_workers = max_workers or self.DEFAULT_MAX_WORKERS
-        
-        return self.web_scraper.scrape(
-            url, zone, response_format, method, country, data_format,
-            async_request, max_workers, timeout
+            if len(url) == 0:
+                raise ValidationError("URL list cannot be empty")
+            if any((not isinstance(u, str) or not u.strip()) for u in url):
+                raise ValidationError("All URLs in the list must be non-empty strings")
+        
+        result = self.web_scraper.scrape(
+            url, zone or self.web_unlocker_zone, response_format, method, country,
+            data_format, async_request, max_workers or self.DEFAULT_MAX_WORKERS, timeout or self.DEFAULT_TIMEOUT
         )
+        return result
 
     def search(
         self,
@@ -286,10 +267,7 @@ def search(
         parse: bool = False
     ) -> Union[Dict[str, Any], str, List[Union[Dict[str, Any], str]]]:
         """
-        ## Search the web using Bright Data SERP API
-        
-        Performs web searches through major search engines using Bright Data's proxy network 
-        for reliable, bot-detection-free results.
+        ## Perform web search using Bright Data's SERP
         
         ### Parameters:
         - `query` (str | List[str]): Search query string or list of search queries
@@ -308,55 +286,37 @@ def search(
         - Single query: `Dict[str, Any]` if `response_format="json"`, `str` if `response_format="raw"`
         - Multiple queries: `List[Union[Dict[str, Any], str]]` corresponding to each input query
         
-        ### Example Usage:
-        ```python
-        # Single search query
-        result = client.search(
-            query="best laptops 2024",
-            search_engine="google",
-            response_format="json"
-        )
-        
-        # Multiple search queries
-        queries = ["python tutorials", "machine learning courses", "web development"]
-        results = client.search(
-            query=queries,
-            search_engine="bing",
-            max_workers=3
-        )
-        ```
-        
-        ### Supported Search Engines:
-        - `"google"` - Google Search
-        - `"bing"` - Microsoft Bing
-        - `"yandex"` - Yandex Search
-        
         ### Raises:
-        - `ValidationError`: Invalid search engine, empty query, or validation errors
-        - `AuthenticationError`: Invalid API token or insufficient permissions  
-        - `APIError`: Request failed or server error
+        - `ValidationError`: Query is missing or invalid
+        - `APIError`: Search request failed or returned an error
         """
         
         # Query validation
+        
         if not query:
-            raise ValueError("The 'query' parameter cannot be None or empty.")
-    
+            raise ValidationError("The 'query' parameter cannot be None or empty.")
         if isinstance(query, str):
             if not query.strip():
-                raise ValueError("The 'query' string cannot be empty or whitespace.")
+                raise ValidationError("Search query cannot be empty or whitespace")
         elif isinstance(query, list):
-            if not all(isinstance(q, str) and q.strip() for q in query):
-                raise ValueError("All queries in the list must be non-empty strings.")
-        else:
-            raise TypeError("The 'query' parameter must be a string or a list of strings.")
+            if len(query) == 0:
+                raise ValidationError("Query list cannot be empty")
+            for q in query:
+                if not isinstance(q, str) or not q.strip():
+                    raise ValidationError("All queries in the list must be non-empty strings")
+                    
+        # Validate search engine
+        
+        search_engine = (search_engine or "google").strip().lower()
+        valid_engines = ["google", "bing", "yandex"]
+        if search_engine not in valid_engines:
+            raise ValidationError(f"Invalid search engine '{search_engine}'. Valid options: {', '.join(valid_engines)}")
 
         zone = zone or self.serp_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         
-        return self.search_api.search(
-            query, search_engine, zone, response_format, method, country,
-            data_format, async_request, max_workers, timeout, parse
-        )
+       result = self.search_api.search(query, search_engine, zone or self.serp_zone, response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
+        return result
 
     def download_content(self, content: Union[Dict, str], filename: str = None, format: str = "json", parse: bool = False) -> str:
         """
@@ -369,19 +329,22 @@ def download_content(self, content: Union[Dict, str], filename: str = None, form
             parse: If True, automatically parse JSON strings in 'body' fields to objects (default: False)
         
         ### Returns:
-            Path to the downloaded file
+            The file path of the saved file.
         """
-        return self.download_api.download_content(content, filename, format, parse)
+        if not content:
+            raise ValidationError("Content is empty or None")
+        return self.download_api.download_content(content, filename, response_format, parse)
     
 
-    def search_chatGPT(
+    def search_gpt(
         self,
         prompt: Union[str, List[str]],
         country: Union[str, List[str]] = None,
-        secondary_prompt: Union[str, List[str]] = None,
+        additional_prompt: Union[str, List[str]] = None,
         web_search: Union[bool, List[bool]] = False,
         sync: bool = True,
-        timeout: int = None
+        timeout: int = None,
+        **kwargs
     ) -> Dict[str, Any]:
 
         """
@@ -400,43 +363,34 @@ def search_chatGPT(
         ### Returns:
         - `Dict[str, Any]`: If sync=True, returns ChatGPT response data directly. If sync=False, returns response with snapshot_id for async processing
         
-        ### Example Usage:
-        ```python
-        # Single prompt (synchronous - returns data immediately)
-        result = client.search_chatGPT(prompt="Top hotels in New York")
-        
-        # Multiple prompts (synchronous - returns data immediately)
-        result = client.search_chatGPT(
-            prompt=["Top hotels in New York", "Best restaurants in Paris", "Tourist attractions in Tokyo"],
-            additional_prompt=["Are you sure?", "", "What about hidden gems?"]
-        )
-        
-        # Asynchronous with web search enabled (returns snapshot_id)
-        result = client.search_chatGPT(
-            prompt="Latest AI developments", 
-            web_search=True,
-            sync=False
-        )
-        # Snapshot ID is automatically printed for async requests
-        ```
-        
         ### Raises:
         - `ValidationError`: Invalid prompt or parameters
         - `AuthenticationError`: Invalid API token or insufficient permissions
         - `APIError`: Request failed or server error
         """
-        if isinstance(prompt, str):
-            prompts = [prompt]
-        elif isinstance(prompt, list) and all(isinstance(p, str) for p in prompt):
-            prompts = prompt
-        else:
-            raise ValidationError("Invalid prompt input: must be a non-empty string or list of strings.")
-    
-        if not prompts:
-            raise ValidationError("At least one prompt is required.")
+        
+       # Handle alternate parameter names from kwargs
+        
+        if 'secondaryPrompt' in kwargs:
+            additional_prompt = kwargs.pop('secondaryPrompt')
+        if 'additionalPrompt' in kwargs:
+            additional_prompt = kwargs.pop('additionalPrompt')
+        if 'webSearch' in kwargs:
+            web_search = kwargs.pop('webSearch')
+            
+        # Validate prompt input
+        
+        if (isinstance(prompt, list) and len(prompt) == 0) or prompt is None:
+            raise ValidationError("prompt is required")
             
+        # Ensure prompts list
+        
+        prompts = prompt if isinstance(prompt, list) else [prompt]
+        
+        # Validate each prompt is a non-empty string
+        
         for p in prompts:
-            if not p or not isinstance(p, str):
+            if not isinstance(p, str) or not p.strip():
                 raise ValidationError("All prompts must be non-empty strings")
         
         def normalize_param(param, name):
@@ -444,45 +398,95 @@ def normalize_param(param, name):
                 return [None] * len(prompts)
             if isinstance(param, list):
                 if len(param) != len(prompts):
-                    raise ValidationError(f"{name} list must have the same length as prompts.")
+                    raise ValidationError(f"Length of {name} list must match number of prompts")
                 return param
             return [param] * len(prompts)
         
         countries = normalize_param(country, "country")
-        secondary_prompts = normalize_param(secondary_prompt, "secondary_prompt")
+        followups = normalize_param(additional_prompt, "additional_prompt")
         web_searches = normalize_param(web_search, "web_search")
         
-        for c in countries:
-            if c and not re.match(r"^[A-Z]{2}$", c):
-                raise ValidationError(f"Invalid country code '{c}'. Must be 2 uppercase letters.")
-        for s in secondary_prompts:
-            if s is not None and not isinstance(s, str):
-                raise ValidationError("Secondary prompts must be strings.")
-        for w in web_searches:
-            if not isinstance(w, bool):
-                raise ValidationError("Web search flags must be boolean.")
-        if timeout is not None and (not isinstance(timeout, int) or timeout <= 0):
-            raise ValidationError("Timeout must be a positive integer.")
+         # Validate country codes
+        for i, c in enumerate(countries):
+            if c is None or str(c).strip() == "":
+                countries[i] = ""
+            else:
+                if not isinstance(c, str) or len(c.strip()) != 2 or not c.strip().isalpha():
+                    raise ValidationError("Country code must be a 2-letter code (ISO 3166-1 alpha-2)")
+                countries[i] = c.strip().lower()
+        # Validate follow-up prompts
+        for i, f in enumerate(followups):
+            if f is None:
+                followups[i] = ""
+            elif not isinstance(f, str):
+                raise ValidationError("All follow-up prompts must be strings")
+            else:
+                followups[i] = f.strip()
+        # Validate web_search flags
+        for i, w in enumerate(web_searches):
+            if w is None:
+                web_searches[i] = False
+            elif not isinstance(w, bool):
+                raise ValidationError("must be a boolean or list of booleans")
     
-        timeout = timeout or (65 if sync else 30)
-
-        # Retry logic
-        max_retries = 3
-        for attempt in range(max_retries):
+        timeout_value = timeout if timeout is not None else (65 if sync else 30)
+        
+        if timeout is not None:
+            if not isinstance(timeout, int):
+                raise ValidationError("Timeout must be an integer")
+            if timeout <= 0:
+                raise ValidationError("Timeout must be greater than 0 seconds")
+            if timeout > 300:
+                raise ValidationError("Timeout cannot exceed 300 seconds (5 minutes)")
+        # Prepare request payload
+        tasks = []
+        for i in range(len(prompts)):
+            task = {
+                "url": "https://chatgpt.com",
+                "prompt": prompts[i].strip(),
+                "country": countries[i] or "",
+                "additional_prompt": followups[i] or "",
+                "web_search": bool(web_searches[i])
+            }
+            tasks.append(task)
+        payload_data = tasks[0] if len(tasks) == 1 else tasks
+        # Make API request with retries
+        endpoint = "https://api.brightdata.com/datasets/v3/scrape" if sync else "https://api.brightdata.com/datasets/v3/trigger"
+        params = {
+            "dataset_id": "gd_m7aof0k82r803d5bjm",
+            "include_errors": "true"
+        }
+        last_exception = None
+        for attempt in range(self.MAX_RETRIES + 1):
             try:
-                return self.chatgpt_api.scrape_chatgpt(
-                    prompts=prompts,
-                    countries=countries,
-                    secondary_prompts=secondary_prompts,
-                    web_searches=web_searches,
-                    sync=sync,
-                    timeout=timeout
-                )
-            except APIError as e:
-                if attempt < max_retries - 1:
-                    time.sleep(2)
-                    continue
-                raise e
+                response = self.session.post(endpoint, params=params, json=payload_data, timeout=timeout_value)
+            except requests.exceptions.RequestException as e:
+                last_exception = e
+                if attempt >= self.MAX_RETRIES:
+                    raise NetworkError(f"Network error: {e}")
+                # Retry on network errors
+                time.sleep(self.RETRY_BACKOFF_FACTOR ** attempt)
+                continue
+            if response.status_code == 401:
+                raise AuthenticationError("Invalid API token or unauthorized")
+            if response.status_code in self.RETRY_STATUSES:
+                if attempt >= self.MAX_RETRIES:
+                    raise RuntimeError("Failed after retries")
+                time.sleep(self.RETRY_BACKOFF_FACTOR ** attempt)
+                continue
+            if response.status_code != 200:
+                raise APIError(f"ChatGPT search failed with status {response.status_code}: {response.text}", status_code=response.status_code, response_text=getattr(response, 'text', ''))
+            # Success
+            result_data = response.json()
+            if sync:
+                return result_data
+            snapshot_id = result_data.get("snapshot_id") or result_data.get("id")
+            if snapshot_id:
+                print(f"Snapshot ID: {snapshot_id}")
+                return {"snapshot_id": snapshot_id}
+            else:
+                raise APIError("Failed to retrieve snapshot ID from response", status_code=response.status_code, response_text=response.text)
+ 
 
     @property
     def scrape_linkedin(self):
@@ -569,7 +573,7 @@ def search_linkedin(self):
     def download_snapshot(
         self,
         snapshot_id: str,
-        format: str = "json",
+        response_format: str = "json",
         compress: bool = False,
         batch_size: int = None,
         part: int = None
@@ -582,7 +586,7 @@ def download_snapshot(
         
         ### Parameters:
         - `snapshot_id` (str): The snapshot ID returned when collection was triggered (required)
-        - `format` (str, optional): Format of the data - "json", "ndjson", "jsonl", or "csv" (default: "json")
+        - `response_format` (str, optional): Format of the output data: "json", "csv", "ndjson", "jsonl" (default: "json")
         - `compress` (bool, optional): Whether the result should be compressed (default: False)
         - `batch_size` (int, optional): Divide into batches of X records (minimum: 1000)
         - `part` (int, optional): If batch_size provided, specify which part to download
@@ -591,36 +595,18 @@ def download_snapshot(
         - `Union[Dict, List, str]`: Snapshot data in the requested format, OR
         - `Dict`: Status response if snapshot is not ready yet (status="not_ready")
         
-        ### Example Usage:
-        ```python
-        # Download complete snapshot
-        result = client.download_snapshot("s_m4x7enmven8djfqak")
-        
-        # Check if snapshot is ready
-        if isinstance(result, dict) and result.get('status') == 'not_ready':
-            print(f"Not ready: {result['message']}")
-            # Try again later
-        else:
-            # Snapshot data is ready
-            data = result
-        
-        # Download as CSV format
-        csv_data = client.download_snapshot("s_m4x7enmven8djfqak", format="csv")
-        ```
-        
+       
         ### Raises:
         - `ValidationError`: Invalid parameters or snapshot_id format
-        - `AuthenticationError`: Invalid API token or insufficient permissions
         - `APIError`: Request failed, snapshot not found, or server error
         """
         
         # snapshot_id validation
     
         if not snapshot_id or not isinstance(snapshot_id, str):
-            raise ValueError("The 'snapshot_id' parameter must be a non-empty string.")
-
+            raise ValidationError("The 'snapshot_id' parameter must be a non-empty string.")
         if not snapshot_id.startswith("s_"):
-            raise ValueError("Invalid 'snapshot_id' format. Expected an ID starting with 's_' (e.g., 's_m4x7enmven8djfqak').")
+            raise ValidationError("Invalid 'snapshot_id' format. Expected an ID starting with 's_' (e.g., 's_m4x7enmven8djfqak').")
         
         # format validation
     
@@ -630,7 +616,7 @@ def download_snapshot(
                 f"Invalid 'format' value: '{format}'. Must be one of {sorted(allowed_formats)}."
             )
 
-        return self.download_api.download_snapshot(snapshot_id, format, compress, batch_size, part)
+        return self.download_api.download_snapshot(snapshot_id, response_format, compress, batch_size, part)
 
 
     def list_zones(self) -> List[Dict[str, Any]]:
@@ -649,38 +635,16 @@ def connect_browser(self) -> str:
         Returns the WebSocket endpoint URL that can be used with Playwright or Selenium
         to connect to Bright Data's scraping browser service.
 
-        ** Security Warning:** The returned URL includes your browser credentials in plain text.
-        Avoid logging, sharing, or exposing this URL in publicly accessible places. Treat it as sensitive information.
-
+        **Security Warning:** The returned URL contains authentication credentials. Do not share this URL or expose it publicly.
 
         ### Returns:
-            WebSocket endpoint URL string for browser connection
-            
-        ### Example Usage:
-        ```python
-        # For Playwright (default)
-        client = bdclient(
-            api_token="your_token",
-            browser_username="username-zone-browser_zone1",
-            browser_password="your_password",
-            browser_type="playwright"  # Playwright/ Puppeteer (default)
-        )
-        endpoint_url = client.connect_browser()  # Returns: wss://...@brd.superproxy.io:9222
-        
-        # For Selenium
-        client = bdclient(
-            api_token="your_token", 
-            browser_username="username-zone-browser_zone1",
-            browser_password="your_password",
-            browser_type="selenium"
-        )
-        endpoint_url = client.connect_browser()  # Returns: https://...@brd.superproxy.io:9515
-        ```
+            WebSocket URL (str) for connecting to the browser (contains one-time token)
         
         ### Raises:
-        - `ValidationError`: Browser credentials not provided or invalid
-        - `AuthenticationError`: Invalid browser credentials
+            - `AuthenticationError`: If the API token or browser zone credentials are invalid
+            - `APIError`: If retrieving the browser endpoint fails
         """
+        
         if not self.browser_username or not self.browser_password:
             logger.error("Browser credentials not configured")
             raise ValidationError(
@@ -727,95 +691,45 @@ def crawl(
         starting from the specified URL(s). Returns a snapshot_id for tracking the crawl progress.
         
         ### Parameters:
-        - `url` (str | List[str]): Domain URL(s) to crawl (required)
-        - `ignore_sitemap` (bool, optional): Ignore sitemap when crawling
-        - `depth` (int, optional): Maximum depth to crawl relative to the entered URL
-        - `include_filter` (str, optional): Regular expression to include only certain URLs (e.g. "/product/")
-        - `exclude_filter` (str, optional): Regular expression to exclude certain URLs (e.g. "/ads/")
-        - `custom_output_fields` (List[str], optional): Custom output schema fields to include
-        - `include_errors` (bool, optional): Include errors in response (default: True)
+            - `url` (str | List[str]): Starting URL or URLs to crawl
+            - `ignore_sitemap` (bool, optional): If True, ignore site's sitemap (default: False)
+            - `depth` (int, optional): Maximum crawl depth (number of hops from start URL)
+            - `include_filter` (str, optional): Only crawl URLs that include this substring (default: None)
+            - `exclude_filter` (str, optional): Do not crawl URLs that include this substring
+            - `custom_output_fields` (List[str], optional): Additional data fields to return (e.g., ["markdown","text","title"])
+            - `include_errors` (bool, optional): If True, include pages that errored in results (default: True)
         
         ### Returns:
-        - `Dict[str, Any]`: Crawl response with snapshot_id for tracking
-        
-        ### Example Usage:
-        ```python
-        # Single URL crawl
-        result = client.crawl("https://example.com/")
-        snapshot_id = result['snapshot_id']
-        
-        # Multiple URLs with filters
-        urls = ["https://example.com/", "https://example2.com/"]
-        result = client.crawl(
-            url=urls,
-            include_filter="/product/",
-            exclude_filter="/ads/",
-            depth=2,
-            ignore_sitemap=True
-        )
-        
-        # Custom output schema
-        result = client.crawl(
-            url="https://example.com/",
-            custom_output_fields=["markdown", "url", "page_title"]
-        )
-        
-        # Download results using snapshot_id
-        data = client.download_snapshot(result['snapshot_id'])
-        ```
-        
-        ### Available Output Fields:
-        - `markdown` - Page content in markdown format
-        - `url` - Page URL
-        - `html2text` - Page content as plain text
-        - `page_html` - Raw HTML content
-        - `ld_json` - Structured data (JSON-LD)
-        - `page_title` - Page title
-        - `timestamp` - Crawl timestamp
-        - `input` - Input parameters used
-        - `discovery_input` - Discovery parameters
-        - `error` - Error information (if any)
-        - `error_code` - Error code (if any)
-        - `warning` - Warning information (if any)
-        - `warning_code` - Warning code (if any)
+            - A dict containing the crawl job details, including a `snapshot_id` to retrieve results via download_snapshot()
         
         ### Raises:
-        - `ValidationError`: Invalid URL or parameters
-        - `AuthenticationError`: Invalid API token or insufficient permissions
-        - `APIError`: Request failed or server error
+            - `ValidationError`: Missing URL or invalid parameters
+            - `APIError`: Crawl request failed
         """
 
         # URL validation
         
         if not url:
-            raise ValueError("The 'url' parameter cannot be None or empty.")
-
+            raise ValidationError("The 'url' parameter cannot be None or empty.")
         if isinstance(url, str):
             if not url.strip():
-                raise ValueError("The 'url' string cannot be empty or whitespace.")
+                raise ValidationError("The 'url' string cannot be empty or whitespace.")
         elif isinstance(url, list):
-            if not all(isinstance(u, str) and u.strip() for u in url):
-                raise ValueError("All URLs in the list must be non-empty strings.")
-        else:
-            raise TypeError("The 'url' parameter must be a string or a list of strings.")
-
-        # Depth validation
-    
+            if len(url) == 0:
+                raise ValidationError("URL list cannot be empty")
+            for u in url:
+                if not isinstance(u, str) or not u.strip():
+                    raise ValidationError("All URLs in the list must be non-empty strings")
         if depth is not None:
             if not isinstance(depth, int):
-                raise TypeError("The 'depth' parameter must be an integer.")
+                raise ValidationError("Depth must be an integer")
             if depth <= 0:
-                raise ValueError("The 'depth' parameter must be a positive integer.")
-                
-        return self.crawl_api.crawl(
-            url=url,
-            ignore_sitemap=ignore_sitemap,
-            depth=depth,
-            include_filter=include_filter,
-            exclude_filter=exclude_filter,
-            custom_output_fields=custom_output_fields,
-            include_errors=include_errors
+                raise ValidationError("The 'depth' parameter must be a positive integer.")
+        
+        result = self.crawl_api.crawl(
+            url, ignore_sitemap, depth, include_filter, exclude_filter, custom_output_fields, include_errors
         )
+        return result
 
     def parse_content(
         self,
@@ -839,30 +753,8 @@ def parse_content(
         ### Returns:
         - `Dict[str, Any]`: Parsed content for single results
         - `List[Dict[str, Any]]`: List of parsed content for multiple results (auto-detected)
-        
-        ### Example Usage:
-        ```python
-        # Parse single URL results
-        scraped_data = client.scrape("https://example.com")
-        parsed = client.parse_content(scraped_data, extract_text=True, extract_links=True)
-        print(f"Title: {parsed['title']}")
-        
-        # Parse multiple URL results (auto-detected)
-        scraped_data = client.scrape(["https://example1.com", "https://example2.com"])
-        parsed_list = client.parse_content(scraped_data, extract_text=True)
-        for result in parsed_list:
-            print(f"Title: {result['title']}")
-        ```
-        
-        ### Available Fields in Each Result:
-        - `type`: 'json' or 'html' - indicates the source data type
-        - `text`: Cleaned text content (if extract_text=True)
-        - `links`: List of {'url': str, 'text': str} objects (if extract_links=True)
-        - `images`: List of {'url': str, 'alt': str} objects (if extract_images=True)
-        - `title`: Page title (if available)
-        - `raw_length`: Length of original content
-        - `structured_data`: Original JSON data (if type='json')
         """
+        
         return parse_content(
             data=data,
             extract_text=extract_text,
@@ -878,125 +770,29 @@ def extract(self, query: str, url: Union[str, List[str]] = None, output_scheme:
         from web pages based on natural language queries. Automatically parses URLs and
         optimizes content for efficient LLM processing.
 
-        ** LLM Key Notice:** If `llm_key` is not provided, the method will attempt to read 
-        the BRIGHTDATA API key from the `BRIGHTDATA_API_TOKEN` environment variable. Ensure it is set.
+        **LLM Key Notice:** If `llm_key` is not provided, the method will attempt to read 
+        the OpenAI API key from the `OPENAI_API_KEY` environment variable. Ensure it is set.
 
         ### Parameters:
         - `query` (str): Natural language query describing what to extract. If `url` parameter is provided,
-                        this becomes the pure extraction query. If `url` is not provided, this should include 
-                        the URL (e.g. "extract the most recent news from cnn.com")
-        - `url` (str | List[str], optional): Direct URL(s) to scrape. If provided, bypasses URL extraction 
-                        from query and sends these URLs to the web unlocker API
-        - `output_scheme` (dict, optional): JSON Schema defining the expected structure for the LLM response.
-                        Uses OpenAI's Structured Outputs for reliable type-safe responses.
-                        Example: {"type": "object", "properties": {"title": {"type": "string"}, "date": {"type": "string"}}, "required": ["title", "date"]}
-        - `llm_key` (str, optional): OpenAI API key. If not provided, uses OPENAI_API_KEY env variable
-        
+                         the extraction will run on that URL. Otherwise, a prior scrape result should be provided.
+        - `url` (str | List[str], optional): Target page URL(s) to extract information from. Can be omitted if using a prior result.
+        - `output_scheme` (Dict, optional): JSON schema defining the structure of desired output (keys and value types)
+        - `llm_key` (str, optional): OpenAI API key for LLM usage (if not provided, will use environment variable)
+
         ### Returns:
-        - `str`: Extracted content (also provides access to metadata via attributes)
-        
-        ### Example Usage:
-        ```python
-        # Using URL parameter with structured output (new)
-        result = client.extract(
-            query="extract the most recent news headlines",
-            url="https://cnn.com",
-            output_scheme={
-                "type": "object",
-                "properties": {
-                    "headlines": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "title": {"type": "string"},
-                                "date": {"type": "string"}
-                            },
-                            "required": ["title", "date"]
-                        }
-                    }
-                },
-                "required": ["headlines"]
-            }
-        )
-        print(result)  # Prints the extracted news content
-        
-        # Using URL in query (original behavior)
-        result = client.extract("extract the most recent news from cnn.com")
-        
-        # Multiple URLs with structured schema
-        result = client.extract(
-            query="extract main headlines", 
-            url=["https://cnn.com", "https://bbc.com"],
-            output_scheme={
-                "type": "object",
-                "properties": {
-                    "sources": {
-                        "type": "array",
-                        "items": {
-                            "type": "object", 
-                            "properties": {
-                                "source_name": {"type": "string"},
-                                "headlines": {"type": "array", "items": {"type": "string"}}
-                            },
-                            "required": ["source_name", "headlines"]
-                        }
-                    }
-                },
-                "required": ["sources"]
-            }
-        )
-        
-        # Access metadata attributes
-        print(f"Source: {result.url}")
-        print(f"Title: {result.source_title}")
-        print(f"Tokens used: {result.token_usage['total_tokens']}")
-        
-        # Use with custom OpenAI key
-        result = client.extract(
-            query="get the price and description",
-            url="https://amazon.com/dp/B079QHML21",
-            llm_key="your-openai-api-key"
-        )
-        ```
-        
-        ### Environment Variable Setup:
-        ```bash
-        # Set in .env file
-        OPENAI_API_KEY=your-openai-api-key
-        ```
-        
-        ### Available Attributes:
-        ```python
-        result = client.extract("extract news from cnn.com")
-        
-        # String value (default behavior)
-        str(result)              # Extracted content
-        
-        # Metadata attributes
-        result.query             # 'extract news'  
-        result.url               # 'https://www.cnn.com'
-        result.source_title      # 'CNN - Breaking News...'
-        result.content_length    # 1234
-        result.token_usage       # {'total_tokens': 2998, ...}
-        result.success           # True
-        result.metadata          # Full metadata dictionary
-        ```
+        - `str`: The extracted information as a text string (may contain JSON or markdown depending on query and output_scheme)
         
         ### Raises:
-        - `ValidationError`: Invalid query format, missing URL, or invalid LLM key
-        - `APIError`: Web scraping failed or LLM processing error
+        - `ValidationError`: Missing query, missing URL, or invalid LLM key
         """
 
         # Validate LLM key
-        if llm_key is None:
-            import os
-            llm_key = os.getenv("BRIGHTDATA_API_TOKEN")
-            if not llm_key:
-                raise ValidationError(
-                    "Missing API key. Provide it via the `llm_key` parameter or set the "
-                    "`BRIGHTDATA_API_TOKEN` environment variable. Example:\n\n"
-                    "export BRIGHTDATA_API_TOKEN='your-openai-api-key'"
-                )
+        if not llm_key:
+            raise ValidationError(
+                "Missing API key. Provide it via the `llm_key` parameter or set the "
+                "`BRIGHTDATA_API_TOKEN` environment variable. Example:\n\n"
+                "export BRIGHTDATA_API_TOKEN='your-openai-api-key'"
+            )
             
         return self.extract_api.extract(query, url, output_scheme, llm_key)

From 23216b4e94569625e40fb84199a0f9d660abfaa3 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:42:54 +0200
Subject: [PATCH 55/70] Update test_client.py

---
 tests/test_client.py | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index 3618124..fb57809 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -15,11 +15,10 @@
 - Testing validation logic and error messages
 """
 
-import pytest
 import os
-from unittest.mock import patch
-
+import pytest
 from brightdata import bdclient
+from unittest.mock import patch
 from brightdata.exceptions import ValidationError
 
 
@@ -117,18 +116,17 @@ def mock_post(*args, **kwargs):
         assert "&brd_json=1" in request_data["url"]
 
 class TestClientSearchGPT:
-    """tests for the client.search_gpt() function"""
+    """Tests for the client.search_gpt() function"""
 
     @pytest.fixture
     @patch('brightdata.utils.zone_manager.ZoneManager.ensure_required_zones')
     def client(self, mock_zones):
         """Create a test client with mocked validation"""
         with patch.dict(os.environ, {}, clear=True):
-            from brightdata import bdclient
             client = bdclient(api_token="valid_test_token_12345678", auto_create_zones=False)
             return client
 
-    #  VALIDATION TESTS 
+    # VALIDATION TESTS 
 
     def test_prompt_required(self, client):
         """Ensure ValidationError is raised when prompt is missing"""
@@ -147,8 +145,32 @@ def test_websearch_bool_validation(self, client):
 
     # PARAMETER NORMALIZATION 
     
-    def test_normalizes_single_values_to_list(self, client):
+    def test_normalizes_single_values_to_list(self, client, monkeypatch):
         """Convert single parameters to list form"""
+        # Mock the session.post to intercept and provide dummy response
+        def dummy_post(url, json=None, timeout=None):
+            from unittest.mock import Mock
+            r = Mock()
+            r.status_code = 200
+            # Build dummy response with normalized lists
+            if isinstance(json, list):
+                prompts = [item.get('prompt', '') for item in json]
+                countries = [item.get('country', '') for item in json]
+                sec_prompts = [item.get('additional_prompt', '') for item in json]
+                web_searches = [item.get('web_search', False) for item in json]
+            else:
+                prompts = [json.get('prompt', '')]
+                countries = [json.get('country', '')]
+                sec_prompts = [json.get('additional_prompt', '')]
+                web_searches = [json.get('web_search', False)]
+            r.json.return_value = {
+                'prompt': prompts,
+                'country': countries,
+                'secondaryPrompt': sec_prompts,
+                'webSearch': web_searches
+            }
+            return r
+        monkeypatch.setattr(client.search_api.session, 'post', dummy_post)
         result = client.search_gpt(
             prompt="hello",
             country="US",
@@ -194,7 +216,7 @@ def mock_post(url, json=None, timeout=None):
             from unittest.mock import Mock
             r = Mock()
             r.status_code = 200
-            r.json.return_value = {"ok": True}
+            r.json.return_value = {"id": "s_testid"}
             return r
 
         monkeypatch.setattr(client.search_api.session, "post", mock_post)
@@ -233,7 +255,3 @@ def mock_post(url, json=None, timeout=None):
         monkeypatch.setattr(client.search_api.session, "post", mock_post)
         with pytest.raises(RuntimeError, match="Failed after retries"):
             client.search_gpt(prompt="fail test", sync=True)
-
-
-if __name__ == "__main__":
-    pytest.main([__file__])

From 8b3bb165d0b9c2b8789516701b102238e6ad2e7e Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:51:22 +0200
Subject: [PATCH 56/70] Update README.md

---
 README.md | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 1e2925a..e6f09aa 100644
--- a/README.md
+++ b/README.md
@@ -38,10 +38,10 @@ print(client.parse_content(results))
 
 | Feature                        | Functions                   | Description
 |--------------------------|-----------------------------|-------------------------------------
-| **Scrape every website** | `scrape`                    | Scrape every website using Bright's scraping and unti bot-detection capabilities
-| **Web search**           | `search`                    | Search google and other search engines by query (supports batch searches)
+| **Scrape any website** | `scrape`                    | Scrape every website using Bright's scraping and unti bot-detection capabilities
+| **Web search(SERP)**           | `search`                    | Search google and other search engines by query (supports batch searches)
 | **Web crawling**         | `crawl`                     | Discover and scrape multiple pages from websites with advanced filtering and depth control
-| **AI-powered extraction** | `extract`                  | Extract specific information from websites using natural language queries and OpenAI
+| **AI extraction** | `extract`                  | Extract specific information from websites using natural language queries and OpenAI
 | **Content parsing**      | `parse_content`             | Extract text, links, images and structured data from API responses (JSON or HTML)
 | **Browser automation**   | `connect_browser`           | Get WebSocket endpoint for Playwright/Selenium integration with Bright Data's scraping browser
 | **Search chatGPT**       | `search_chatGPT`            | Prompt chatGPT and scrape its answers, support multiple inputs and follow-up prompts
@@ -55,7 +55,7 @@ print(client.parse_content(results))
 
 #### `Search()`
 ```python
-# Simple single query search
+# Single query search
 result = client.search("pizza restaurants")
 
 # Try using multiple queries (parallel processing), with custom configuration
@@ -69,7 +69,7 @@ results = client.search(
 ```
 #### `scrape()`
 ```python
-# Simple single URL scrape
+# Single URL scrape
 result = client.scrape("https://example.com")
 
 # Multiple URLs (parallel processing) with custom options
@@ -83,13 +83,20 @@ results = client.scrape(
 ```
 #### `search_chatGPT()`
 ```python
-result = client.search.chatGPT(
+# Sync mode (immediate result)
+result = client.search_gpt(
     prompt="Top startups in Tel Aviv",
     country="IL",
     web_search=True
 )
+print(result)
 
-client.download_content(result) # In case of timeout error, your snapshot_id is presented and you will downloaded it using download_snapshot()
+# Async mode (retrieve snapshot later)
+result = client.search_gpt(
+    prompt="Top startups in 2024",
+    sync=False
+)
+print(result["snapshot_id"])
 ```
 
 #### `search_linkedin.`

From 4e376a035ceef10a0acc41faa39fe35019db955d Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 15:04:54 +0200
Subject: [PATCH 57/70] Update setup.py

---
 setup.py | 136 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 76 insertions(+), 60 deletions(-)

diff --git a/setup.py b/setup.py
index a662168..319fb0b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,70 +1,86 @@
-"""
-Setup script for Bright Data SDK
+# """
+# Setup script for Bright Data SDK
 
-This file provides backward compatibility for tools that don't support pyproject.toml.
-The main configuration is in pyproject.toml following modern Python packaging standards.
-"""
+# This file provides backward compatibility for tools that don't support pyproject.toml.
+# The main configuration is in pyproject.toml following modern Python packaging standards.
+# """
 
-from setuptools import setup, find_packages
-import os
+# from setuptools import setup, find_packages
+# import os
+
+# # Read the README file
+# def read_readme():
+#     with open("README.md", "r", encoding="utf-8") as fh:
+#         return fh.read()
 
-# Read the README file
-def read_readme():
-    with open("README.md", "r", encoding="utf-8") as fh:
-        return fh.read()
+# # Read version from __init__.py
+# def read_version():
+#     with open(os.path.join("brightdata", "__init__.py"), "r", encoding="utf-8") as fh:
+#         for line in fh:
+#             if line.startswith("__version__"):
+#                 return line.split('"')[1]
+#     return "1.0.0"
 
-# Read version from __init__.py
-def read_version():
-    with open(os.path.join("brightdata", "__init__.py"), "r", encoding="utf-8") as fh:
-        for line in fh:
-            if line.startswith("__version__"):
-                return line.split('"')[1]
-    return "1.0.0"
+# setup(
+#     name="brightdata-sdk",
+#     version=read_version(),
+#     author="Bright Data",
+#     author_email="support@brightdata.com",
+#     description="Python SDK for Bright Data Web Scraping and SERP APIs",
+#     long_description=read_readme(),
+#     long_description_content_type="text/markdown",
+#     url="https://github.com/brightdata/brightdata-sdk-python",
+#     packages=find_packages(),
+#     classifiers=[
+#         "Development Status :: 4 - Beta",
+#         "Intended Audience :: Developers",
+#         "License :: OSI Approved :: MIT License",
+#         "Operating System :: OS Independent",
+#         "Programming Language :: Python :: 3",
+#         "Programming Language :: Python :: 3.7",
+#         "Programming Language :: Python :: 3.8", 
+#         "Programming Language :: Python :: 3.9",
+#         "Programming Language :: Python :: 3.10",
+#         "Programming Language :: Python :: 3.11",
+#         "Programming Language :: Python :: 3.12",
+#         "Topic :: Internet :: WWW/HTTP",
+#         "Topic :: Software Development :: Libraries :: Python Modules",
+#         "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+#     ],
+#     python_requires=">=3.7",
+#     install_requires=[
+#         "requests>=2.25.0",
+#         "python-dotenv>=0.19.0",
+#     ],
+#     extras_require={
+#         "dev": [
+#             "pytest>=6.0.0",
+#             "pytest-cov>=2.10.0",
+#             "black>=21.0.0",
+#             "isort>=5.0.0",
+#             "flake8>=3.8.0",
+#         ],
+#     },
+#     keywords="brightdata, web scraping, proxy, serp, api, data extraction",
+#     project_urls={
+#         "Bug Reports": "https://github.com/brightdata/brightdata-sdk-python/issues",
+#         "Documentation": "https://github.com/brightdata/brightdata-sdk-python#readme",
+#         "Source": "https://github.com/brightdata/brightdata-sdk-python",
+#     },
+# )
+
+from setuptools import setup, find_packages
 
 setup(
-    name="brightdata-sdk",
-    version=read_version(),
-    author="Bright Data",
-    author_email="support@brightdata.com",
-    description="Python SDK for Bright Data Web Scraping and SERP APIs",
-    long_description=read_readme(),
-    long_description_content_type="text/markdown",
-    url="https://github.com/brightdata/brightdata-sdk-python",
-    packages=find_packages(),
-    classifiers=[
-        "Development Status :: 4 - Beta",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8", 
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Topic :: Internet :: WWW/HTTP",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-        "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
-    ],
-    python_requires=">=3.7",
+    name="brightdata",
+    version="0.1.0",
+    package_dir={"": "src"},
+    packages=find_packages(where="src"),
     install_requires=[
         "requests>=2.25.0",
         "python-dotenv>=0.19.0",
+        "aiohttp>=3.8.0",
+        "beautifulsoup4>=4.9.0",
+        "openai>=1.0.0"
     ],
-    extras_require={
-        "dev": [
-            "pytest>=6.0.0",
-            "pytest-cov>=2.10.0",
-            "black>=21.0.0",
-            "isort>=5.0.0",
-            "flake8>=3.8.0",
-        ],
-    },
-    keywords="brightdata, web scraping, proxy, serp, api, data extraction",
-    project_urls={
-        "Bug Reports": "https://github.com/brightdata/brightdata-sdk-python/issues",
-        "Documentation": "https://github.com/brightdata/brightdata-sdk-python#readme",
-        "Source": "https://github.com/brightdata/brightdata-sdk-python",
-    },
-)
\ No newline at end of file
+)

From db9aee3ebadcc565d30b429976c244e5ffe8466c Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 15:07:45 +0200
Subject: [PATCH 58/70] Update client.py

---
 src/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index 322b729..891cb14 100644
--- a/src/client.py
+++ b/src/client.py
@@ -315,7 +315,7 @@ def search(
         zone = zone or self.serp_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         
-       result = self.search_api.search(query, search_engine, zone or self.serp_zone, response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
+        result = self.search_api.search(query, search_engine, zone or self.serp_zone, response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
         return result
 
     def download_content(self, content: Union[Dict, str], filename: str = None, format: str = "json", parse: bool = False) -> str:

From 96ae5275e2a35159bbd3c347bf25ac83df7183f1 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 15:22:29 +0200
Subject: [PATCH 59/70] Update client.py

---
 src/client.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/client.py b/src/client.py
index 891cb14..b354a83 100644
--- a/src/client.py
+++ b/src/client.py
@@ -294,7 +294,7 @@ def search(
         # Query validation
         
         if not query:
-            raise ValidationError("The 'query' parameter cannot be None or empty.")
+            raise ValidationError("query cannot be empty")
         if isinstance(query, str):
             if not query.strip():
                 raise ValidationError("Search query cannot be empty or whitespace")
@@ -315,7 +315,9 @@ def search(
         zone = zone or self.serp_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         
-        result = self.search_api.search(query, search_engine, zone or self.serp_zone, response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
+        result = self.search_api.search(query, search_engine, zone or self.serp_zone,
+                                response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
+
         return result
 
     def download_content(self, content: Union[Dict, str], filename: str = None, format: str = "json", parse: bool = False) -> str:
@@ -412,7 +414,7 @@ def normalize_param(param, name):
                 countries[i] = ""
             else:
                 if not isinstance(c, str) or len(c.strip()) != 2 or not c.strip().isalpha():
-                    raise ValidationError("Country code must be a 2-letter code (ISO 3166-1 alpha-2)")
+                    raise ValidationError("must be 2-letter code")
                 countries[i] = c.strip().lower()
         # Validate follow-up prompts
         for i, f in enumerate(followups):
@@ -459,7 +461,7 @@ def normalize_param(param, name):
         last_exception = None
         for attempt in range(self.MAX_RETRIES + 1):
             try:
-                response = self.session.post(endpoint, params=params, json=payload_data, timeout=timeout_value)
+                response = self.session.post(endpoint, json=payload_data, timeout=timeout_value)
             except requests.exceptions.RequestException as e:
                 last_exception = e
                 if attempt >= self.MAX_RETRIES:

From b60f9942cbe8c7d27c611dc92db1902cff657d82 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Thu, 30 Oct 2025 15:26:10 +0200
Subject: [PATCH 60/70] Update client.py

---
 src/client.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/client.py b/src/client.py
index b354a83..fcd581f 100644
--- a/src/client.py
+++ b/src/client.py
@@ -315,8 +315,19 @@ def search(
         zone = zone or self.serp_zone
         max_workers = max_workers or self.DEFAULT_MAX_WORKERS
         
-        result = self.search_api.search(query, search_engine, zone or self.serp_zone,
-                                response_format, method, parse, timeout or self.DEFAULT_TIMEOUT)
+        result = self.search_api.search(
+        query=query,
+        search_engine=search_engine,
+        zone=zone or self.serp_zone,
+        response_format=response_format,
+        method=method,
+        country=country,
+        data_format=data_format,
+        async_request=async_request,
+        max_workers=max_workers,
+        timeout=timeout or self.DEFAULT_TIMEOUT,
+        parse=parse,
+        )
 
         return result
 

From 6929beb5cac09713559e470a129a304d24424cd5 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 16:48:15 +0200
Subject: [PATCH 61/70] Create search.py

---
 src/search.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 src/search.py

diff --git a/src/search.py b/src/search.py
new file mode 100644
index 0000000..92aa93f
--- /dev/null
+++ b/src/search.py
@@ -0,0 +1,146 @@
+from typing import Union, List, Dict, Any
+import re
+import time
+
+from .exceptions import ValidationError, APIError
+
+
+class Search:
+
+    def __init__(self, client) -> None:
+        # Hold a reference to the root client for shared config / APIs.
+        self._c = client
+
+    # ---------- GPT ----------
+    def gpt(
+        self,
+        prompt: Union[str, List[str]],
+        country: Union[str, List[str]] = None,
+        secondaryPrompt: Union[str, List[str]] = None,
+        webSearch: Union[bool, List[bool]] = False,
+        sync: bool = True,
+        timeout: int = None,
+    ) -> Dict[str, Any]:
+        """
+        Query ChatGPT via Bright Data's dataset API.
+
+        Parameters:
+        
+        - prompt : str | list[str] - Prompt(s) to send.
+        - country : str | list[str], optional - 2-letter uppercase ISO code per prompt (e.g., "US"). May be None.
+        - secondaryPrompt : str | list[str], optional - Follow-up prompt(s) per item.
+        - webSearch : bool | list[bool], default False - Enable ChatGPT web search (per prompt if list).
+        - sync : bool, default True - return results immediately. False: return a snapshot_id to poll later.
+        - timeout : int, optional - Default 65s (sync) / 30s (async).
+
+        Returns: dict | list
+
+        """
+      
+        # normalize prompts 
+        if isinstance(prompt, str):
+            prompts = [prompt]
+        elif isinstance(prompt, list) and all(isinstance(p, str) for p in prompt):
+            prompts = prompt
+        else:
+            raise ValidationError("Invalid prompt input: must be a non-empty string or list of strings.")
+
+        if not prompts:
+            raise ValidationError("At least one prompt is required.")
+
+        # helper for normalization
+        def _normalize(param, name):
+            if param is None:
+                return [None] * len(prompts)
+            if isinstance(param, list):
+                if len(param) != len(prompts):
+                    raise ValidationError(f"{name} list must have the same length as prompts.")
+                return param
+            return [param] * len(prompts)
+
+        countries = _normalize(country, "country")
+        secondary_prompts = _normalize(secondaryPrompt, "secondary_prompt")
+        web_searches = _normalize(webSearch, "web_search")
+
+        # validation
+        for c in countries:
+            if c and not re.match(r"^[A-Z]{2}$", c):
+                raise ValidationError(f"Invalid country code '{c}'. Must be 2 uppercase letters.")
+        for s in secondary_prompts:
+            if s is not None and not isinstance(s, str):
+                raise ValidationError("Secondary prompts must be strings.")
+        for w in web_searches:
+            if not isinstance(w, bool):
+                raise ValidationError("Web search flags must be boolean.")
+        if timeout is not None and (not isinstance(timeout, int) or timeout <= 0):
+            raise ValidationError("Timeout must be a positive integer.")
+
+        timeout = timeout or (65 if sync else 30)
+
+        # retry loop (API-level transient failures)
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                # Delegate to the existing ChatGPT API client
+                return self._c.chatgpt_api.scrape_chatgpt(
+                    prompts=prompts,
+                    countries=countries,
+                    additional_prompts=secondary_prompts,
+                    web_searches=web_searches,
+                    sync=sync,
+                    timeout=timeout,
+                )
+            except APIError as e:
+                if attempt < max_retries - 1:
+                    time.sleep(2)
+                    continue
+                raise e
+
+    # Web (SERP)
+    def web(
+        self,
+        query: Union[str, List[str]],
+        search_engine: str = "google",
+        zone: str = None,
+        response_format: str = "raw",
+        method: str = "GET",
+        country: str = "",
+        data_format: str = "html",
+        async_request: bool = False,
+        max_workers: int = None,
+        timeout: int = None,
+        parse: bool = False,
+    ):
+
+        zone = zone or self._c.serp_zone
+        max_workers = max_workers or self._c.DEFAULT_MAX_WORKERS
+        # Basic validation borrowed from client.search()
+        if not query:
+            raise ValueError("The 'query' parameter cannot be None or empty.")
+        if isinstance(query, str):
+            if not query.strip():
+                raise ValueError("The 'query' string cannot be empty or whitespace.")
+        elif isinstance(query, list):
+            if not all(isinstance(q, str) and q.strip() for q in query):
+                raise ValueError("All queries in the list must be non-empty strings.")
+        else:
+            raise TypeError("The 'query' parameter must be a string or a list of strings.")
+
+        return self._c.search_api.search(
+            query, search_engine, zone, response_format, method, country,
+            data_format, async_request, max_workers, timeout, parse
+        )
+
+    # LinkedIn 
+    @property
+    def linkedin(self):
+        """
+        Namespaced LinkedIn search helpers.
+
+        Example:
+            client.search.linkedin.posts(...)
+            client.search.linkedin.jobs(...)
+            client.search.linkedin.profiles(...)
+        """
+
+        return self._c.search_linkedin

From b7f7676457e754ad158eb4a788782f0cd83a2353 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 16:48:37 +0200
Subject: [PATCH 62/70] Update client.py

---
 src/client.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/client.py b/src/client.py
index fcd581f..f889ad8 100644
--- a/src/client.py
+++ b/src/client.py
@@ -3,6 +3,7 @@
 import time
 import json
 import requests
+from .search import Search
 from datetime import datetime
 from .api.crawl import CrawlAPI
 from .api.chatgpt import ChatGPTAPI

From d691094e343ba4928013dacfd20ef2cf3bacf56b Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 16:49:47 +0200
Subject: [PATCH 63/70] Update client.py

---
 src/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index f889ad8..109f2a8 100644
--- a/src/client.py
+++ b/src/client.py
@@ -114,7 +114,7 @@ def __init__(
         self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
         self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
         
-        
+        self.search = Search(self)
         
         valid_browser_types = ["playwright", "puppeteer", "selenium"]
         if browser_type not in valid_browser_types:

From a3b8830b56997d1ed971849ba74588f80f579982 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 17:33:10 +0200
Subject: [PATCH 64/70] Update pyproject.toml

---
 pyproject.toml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0991d9f..d1c2c48 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,6 +60,9 @@ Repository = "https://github.com/brightdata/bright-data-sdk-python"
 "Bug Reports" = "https://github.com/brightdata/bright-data-sdk-python/issues"
 Changelog = "https://github.com/brightdata/bright-data-sdk-python/blob/main/CHANGELOG.md"
 
+[tool.setuptools]
+package-dir = {"" = "src"}
+
 [tool.setuptools.packages.find]
 include = ["brightdata*"]
 exclude = ["tests*"]
@@ -134,4 +137,4 @@ filterwarnings = [
     "error",
     "ignore::UserWarning",
     "ignore::DeprecationWarning",
-]
\ No newline at end of file
+]

From f0fbefdf031930ab6255d1641e2e0d2f21bf1890 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 17:35:14 +0200
Subject: [PATCH 65/70] Update setup.py

---
 setup.py | 123 ++++++++++++++++++++++---------------------------------
 1 file changed, 48 insertions(+), 75 deletions(-)

diff --git a/setup.py b/setup.py
index 319fb0b..bd4ca8a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,86 +1,59 @@
-# """
-# Setup script for Bright Data SDK
-
-# This file provides backward compatibility for tools that don't support pyproject.toml.
-# The main configuration is in pyproject.toml following modern Python packaging standards.
-# """
-
-# from setuptools import setup, find_packages
-# import os
-
-# # Read the README file
-# def read_readme():
-#     with open("README.md", "r", encoding="utf-8") as fh:
-#         return fh.read()
-
-# # Read version from __init__.py
-# def read_version():
-#     with open(os.path.join("brightdata", "__init__.py"), "r", encoding="utf-8") as fh:
-#         for line in fh:
-#             if line.startswith("__version__"):
-#                 return line.split('"')[1]
-#     return "1.0.0"
-
-# setup(
-#     name="brightdata-sdk",
-#     version=read_version(),
-#     author="Bright Data",
-#     author_email="support@brightdata.com",
-#     description="Python SDK for Bright Data Web Scraping and SERP APIs",
-#     long_description=read_readme(),
-#     long_description_content_type="text/markdown",
-#     url="https://github.com/brightdata/brightdata-sdk-python",
-#     packages=find_packages(),
-#     classifiers=[
-#         "Development Status :: 4 - Beta",
-#         "Intended Audience :: Developers",
-#         "License :: OSI Approved :: MIT License",
-#         "Operating System :: OS Independent",
-#         "Programming Language :: Python :: 3",
-#         "Programming Language :: Python :: 3.7",
-#         "Programming Language :: Python :: 3.8", 
-#         "Programming Language :: Python :: 3.9",
-#         "Programming Language :: Python :: 3.10",
-#         "Programming Language :: Python :: 3.11",
-#         "Programming Language :: Python :: 3.12",
-#         "Topic :: Internet :: WWW/HTTP",
-#         "Topic :: Software Development :: Libraries :: Python Modules",
-#         "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
-#     ],
-#     python_requires=">=3.7",
-#     install_requires=[
-#         "requests>=2.25.0",
-#         "python-dotenv>=0.19.0",
-#     ],
-#     extras_require={
-#         "dev": [
-#             "pytest>=6.0.0",
-#             "pytest-cov>=2.10.0",
-#             "black>=21.0.0",
-#             "isort>=5.0.0",
-#             "flake8>=3.8.0",
-#         ],
-#     },
-#     keywords="brightdata, web scraping, proxy, serp, api, data extraction",
-#     project_urls={
-#         "Bug Reports": "https://github.com/brightdata/brightdata-sdk-python/issues",
-#         "Documentation": "https://github.com/brightdata/brightdata-sdk-python#readme",
-#         "Source": "https://github.com/brightdata/brightdata-sdk-python",
-#     },
-# )
-
 from setuptools import setup, find_packages
 
 setup(
-    name="brightdata",
-    version="0.1.0",
-    package_dir={"": "src"},
+    name="brightdata-sdk",
+    version="1.1.3",
+    description="Python SDK for Bright Data Web Scraping and SERP APIs",
+    author="Bright Data",
+    author_email="support@brightdata.com",
+    maintainer="Bright Data",
+    maintainer_email="idanv@brightdata.com",
+    license="MIT",
     packages=find_packages(where="src"),
+    package_dir={"": "src"},
+    include_package_data=True,
+    python_requires=">=3.8",
     install_requires=[
         "requests>=2.25.0",
         "python-dotenv>=0.19.0",
         "aiohttp>=3.8.0",
         "beautifulsoup4>=4.9.0",
-        "openai>=1.0.0"
+        "openai>=1.0.0",
+    ],
+    extras_require={
+        "dev": [
+            "pytest>=6.0.0",
+            "pytest-cov>=2.10.0",
+            "black>=21.0.0",
+            "isort>=5.0.0",
+            "flake8>=3.8.0",
+            "mypy>=0.900",
+        ],
+        "test": [
+            "pytest>=6.0.0",
+            "pytest-cov>=2.10.0",
+        ],
+    },
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Topic :: Internet :: WWW/HTTP",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
     ],
+    project_urls={
+        "Homepage": "https://github.com/brightdata/bright-data-sdk-python",
+        "Documentation": "https://github.com/brightdata/bright-data-sdk-python#readme",
+        "Repository": "https://github.com/brightdata/bright-data-sdk-python",
+        "Bug Reports": "https://github.com/brightdata/bright-data-sdk-python/issues",
+        "Changelog": "https://github.com/brightdata/bright-data-sdk-python/blob/main/CHANGELOG.md",
+    },
 )

From 4114c221592817af9cbeec349c80828c08e68afa Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 18:19:36 +0200
Subject: [PATCH 66/70] Update search.py

---
 src/search.py | 240 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 199 insertions(+), 41 deletions(-)

diff --git a/src/search.py b/src/search.py
index 92aa93f..d56bbfd 100644
--- a/src/search.py
+++ b/src/search.py
@@ -1,55 +1,179 @@
-from typing import Union, List, Dict, Any
 import re
 import time
-
+from __future__ import annotations
 from .exceptions import ValidationError, APIError
+from typing import Any, Dict, List, Optional, Union
+
+
+class SearchGPTResult:
+    """
+    Wrapper for GPT search results.
+
+    Args
+    
+    raw : Any - The raw API response object (dict / list / text).
+    text : str (property) - Best-effort extraction of the final answer text (T1).
+    prompt : Optional[str] - The original prompt (for single result).
+    country : Optional[str] - Country code used for the request (single result).
+    usage : Optional[Dict[str, Any]] - Token/usage metadata if available.
+    snapshot_id : Optional[str] - Present when sync=False (async job queued).
+    """
+
+    def __init__(
+        self,
+        raw: Any,
+        prompt: Optional[str] = None,
+        country: Optional[str] = None,
+        usage: Optional[Dict[str, Any]] = None,
+        snapshot_id: Optional[str] = None,
+    ) -> None:
+        self.raw = raw
+        self.prompt = prompt
+        self.country = country
+        self.usage = usage
+        self.snapshot_id = snapshot_id
+
+    # helpers 
+    @staticmethod
+    def _coalesce(*vals) -> Optional[str]:
+        for v in vals:
+            if isinstance(v, str) and v.strip():
+                return v
+        return None
+
+    @staticmethod
+    def _dig(d: Any, *keys) -> Any:
+        cur = d
+        for k in keys:
+            if isinstance(cur, dict) and k in cur:
+                cur = cur[k]
+            else:
+                return None
+        return cur
+
+    @property
+    def text(self) -> Optional[str]:
+        """
+        Best-effort extraction of ONLY the final answer text.
+        Tries common fields/paths seen in ChatGPT-like payloads.
+        Returns None if not found.
+        """
+        raw = self.raw
+
+        # If API returned a plain string
+        if isinstance(raw, str):
+            return raw.strip() or None
+
+        t = self._dig(raw, "answer")
+        if isinstance(t, str):
+            return t.strip() or None
+
+        t = self._dig(raw, "data", "answer")
+        if isinstance(t, str):
+            return t.strip() or None
+
+        t = self._dig(raw, "message", "content")
+        if isinstance(t, str):
+            return t.strip() or None
+
+        choices = self._dig(raw, "choices")
+        if isinstance(choices, list) and choices:
+            content = self._dig(choices[0], "message", "content")
+            if isinstance(content, str):
+                return content.strip() or None
+            
+            content = choices[0].get("text") if isinstance(choices[0], dict) else None
+            if isinstance(content, str):
+                return content.strip() or None
+
+        t = self._dig(raw, "result")
+        if isinstance(t, str):
+            return t.strip() or None
+        t = self._dig(raw, "output")
+        if isinstance(t, str):
+            return t.strip() or None
+
+        for key in ("content", "text", "final", "final_text"):
+            v = self._dig(raw, key)
+            if isinstance(v, str):
+                return v.strip() or None
+
+        return None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "prompt": self.prompt,
+            "country": self.country,
+            "usage": self.usage,
+            "snapshot_id": self.snapshot_id,
+            "raw": self.raw,
+            "text": self.text,
+        }
 
 
 class Search:
+    """
+    Namespaced search interface.
+    """
 
     def __init__(self, client) -> None:
-        # Hold a reference to the root client for shared config / APIs.
-        self._c = client
+        self._c = client  # root client (reuses session, APIs, zones)
+
+    def __call__(
+        self,
+        query: Union[str, List[str]],
+        search_engine: str = "google",
+        zone: str = None,
+        response_format: str = "raw",
+        method: str = "GET",
+        country: str = "",
+        data_format: str = "html",
+        async_request: bool = False,
+        max_workers: int = None,
+        timeout: int = None,
+        parse: bool = False,
+    ):
+        return self.web(
+            query=query,
+            search_engine=search_engine,
+            zone=zone,
+            response_format=response_format,
+            method=method,
+            country=country,
+            data_format=data_format,
+            async_request=async_request,
+            max_workers=max_workers,
+            timeout=timeout,
+            parse=parse,
+        )
 
-    # ---------- GPT ----------
+    # GPT 
     def gpt(
         self,
         prompt: Union[str, List[str]],
         country: Union[str, List[str]] = None,
-        secondaryPrompt: Union[str, List[str]] = None,
-        webSearch: Union[bool, List[bool]] = False,
+        secondary_prompt: Union[str, List[str]] = None,
+        web_search: Union[bool, List[bool]] = False,
         sync: bool = True,
         timeout: int = None,
-    ) -> Dict[str, Any]:
+    ) -> Union[SearchGPTResult, List[SearchGPTResult]]:
         """
         Query ChatGPT via Bright Data's dataset API.
 
-        Parameters:
-        
-        - prompt : str | list[str] - Prompt(s) to send.
-        - country : str | list[str], optional - 2-letter uppercase ISO code per prompt (e.g., "US"). May be None.
-        - secondaryPrompt : str | list[str], optional - Follow-up prompt(s) per item.
-        - webSearch : bool | list[bool], default False - Enable ChatGPT web search (per prompt if list).
-        - sync : bool, default True - return results immediately. False: return a snapshot_id to poll later.
-        - timeout : int, optional - Default 65s (sync) / 30s (async).
-
-        Returns: dict | list
-
+        Returns - Single object for single prompt, list for multiple prompts (M2).
         """
-      
-        # normalize prompts 
+        prompts: List[str]
         if isinstance(prompt, str):
             prompts = [prompt]
         elif isinstance(prompt, list) and all(isinstance(p, str) for p in prompt):
             prompts = prompt
         else:
             raise ValidationError("Invalid prompt input: must be a non-empty string or list of strings.")
-
         if not prompts:
             raise ValidationError("At least one prompt is required.")
 
-        # helper for normalization
-        def _normalize(param, name):
+        # normalization helper 
+        def _norm(param, name):
             if param is None:
                 return [None] * len(prompts)
             if isinstance(param, list):
@@ -58,9 +182,9 @@ def _normalize(param, name):
                 return param
             return [param] * len(prompts)
 
-        countries = _normalize(country, "country")
-        secondary_prompts = _normalize(secondaryPrompt, "secondary_prompt")
-        web_searches = _normalize(webSearch, "web_search")
+        countries = _norm(country, "country")
+        secondary_prompts = _norm(secondary_prompt, "secondary_prompt")
+        web_searches = _norm(web_search, "web_search")
 
         # validation
         for c in countries:
@@ -77,12 +201,12 @@ def _normalize(param, name):
 
         timeout = timeout or (65 if sync else 30)
 
-        # retry loop (API-level transient failures)
+        # retries around API call
         max_retries = 3
+        last_err = None
         for attempt in range(max_retries):
             try:
-                # Delegate to the existing ChatGPT API client
-                return self._c.chatgpt_api.scrape_chatgpt(
+                result = self._c.chatgpt_api.scrape_chatgpt(
                     prompts=prompts,
                     countries=countries,
                     additional_prompts=secondary_prompts,
@@ -90,11 +214,49 @@ def _normalize(param, name):
                     sync=sync,
                     timeout=timeout,
                 )
+                # Wrap result(s)
+                if not sync:
+                    # Async: expect {"snapshot_id": "...", ...}
+                    snapshot_id = result.get("snapshot_id") if isinstance(result, dict) else None
+                    return SearchGPTResult(raw=result, snapshot_id=snapshot_id)
+
+                if isinstance(result, list):
+                    out: List[SearchGPTResult] = []
+                    if len(result) == len(prompts):
+                        for i, item in enumerate(result):
+                            out.append(
+                                SearchGPTResult(
+                                    raw=item,
+                                    prompt=prompts[i],
+                                    country=countries[i],
+                                    usage=None,
+                                )
+                            )
+                    else:
+                        for item in result:
+                            out.append(SearchGPTResult(raw=item))
+                    return out[0] if len(prompts) == 1 and len(out) == 1 else out
+
+                return SearchGPTResult(raw=result, prompt=prompts[0] if len(prompts) == 1 else None)
+
             except APIError as e:
+                last_err = e
                 if attempt < max_retries - 1:
                     time.sleep(2)
                     continue
                 raise e
+            except Exception as e:
+                if isinstance(e, (ValidationError, APIError)):
+                    raise
+                last_err = e
+                if attempt < max_retries - 1:
+                    time.sleep(2)
+                    continue
+                raise APIError(f"Unexpected error in search.gpt: {e}") from e
+
+        if last_err:
+            raise last_err
+        raise APIError("Unknown error in search.gpt")
 
     # Web (SERP)
     def web(
@@ -111,10 +273,9 @@ def web(
         timeout: int = None,
         parse: bool = False,
     ):
-
-        zone = zone or self._c.serp_zone
-        max_workers = max_workers or self._c.DEFAULT_MAX_WORKERS
-        # Basic validation borrowed from client.search()
+        """
+        Web/SERP search wrapper. Thin pass-through to SearchAPI with validation.
+        """
         if not query:
             raise ValueError("The 'query' parameter cannot be None or empty.")
         if isinstance(query, str):
@@ -126,21 +287,18 @@ def web(
         else:
             raise TypeError("The 'query' parameter must be a string or a list of strings.")
 
+        zone = zone or self._c.serp_zone
+        max_workers = max_workers or self._c.DEFAULT_MAX_WORKERS
+
         return self._c.search_api.search(
             query, search_engine, zone, response_format, method, country,
             data_format, async_request, max_workers, timeout, parse
         )
 
-    # LinkedIn 
+    # LinkedIn
     @property
     def linkedin(self):
         """
         Namespaced LinkedIn search helpers.
-
-        Example:
-            client.search.linkedin.posts(...)
-            client.search.linkedin.jobs(...)
-            client.search.linkedin.profiles(...)
         """
-
         return self._c.search_linkedin

From b868ca39f32360c2dfb4677daf5896bf2e25862e Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Sun, 2 Nov 2025 18:25:17 +0200
Subject: [PATCH 67/70] Update client.py

---
 src/client.py | 276 ++++++++++++++++++++++++++------------------------
 1 file changed, 141 insertions(+), 135 deletions(-)

diff --git a/src/client.py b/src/client.py
index 109f2a8..c5ed140 100644
--- a/src/client.py
+++ b/src/client.py
@@ -56,144 +56,150 @@ def __init__(
         structured_logging: bool = True,
         verbose: bool = None
     ):
-        """
-        Initialize the Bright Data client with your API token
-        
-        Create an account at https://brightdata.com/ to get your API token.
-        Go to settings > API keys , and verify that your API key have "Admin" permissions.
-
-        Args:
-            api_token: Your Bright Data API token (can also be set via BRIGHTDATA_API_TOKEN env var)
-            auto_create_zones: Automatically create required zones if they don't exist (default: True)
-            web_unlocker_zone: Custom zone name for web unlocker (default: from env or 'sdk_unlocker')
-            serp_zone: Custom zone name for SERP API (default: from env or 'sdk_serp')
-            browser_zone: Custom zone name for Browser API (default: from env or 'sdk_browser')
-            browser_username: Username for Browser API in format "username-zone-{zone_name}" (can also be set via BRIGHTDATA_BROWSER_USERNAME env var)
-            browser_password: Password for Browser API authentication (can also be set via BRIGHTDATA_BROWSER_PASSWORD env var)
-            browser_type: Browser automation tool type - "playwright", "puppeteer", or "selenium" (default: "playwright")
-            log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-            structured_logging: Whether to use structured JSON logging (default: True)
-            verbose: Enable verbose logging (default: False). Can also be set via BRIGHTDATA_VERBOSE env var.
-                    When False, only shows WARNING and above. When True, shows all logs per log_level.
-        """
-        
-        try:
-            from dotenv import load_dotenv
-            load_dotenv()
-        except ImportError:
-            pass
-        
-        if verbose is None:
-            env_verbose = os.getenv('BRIGHTDATA_VERBOSE', '').lower()
-            verbose = env_verbose in ('true', '1', 'yes', 'on')
-        
-        setup_logging(log_level, structured_logging, verbose)
-        logger.info("Initializing Bright Data SDK client")
-            
-        self.api_token = api_token or os.getenv('BRIGHTDATA_API_TOKEN')
-        if not self.api_token:
-            logger.error("API token not provided")
-            raise ValidationError("API token is required. Provide it as parameter or set BRIGHTDATA_API_TOKEN environment variable")
-        
-        if not isinstance(self.api_token, str):
-            logger.error("API token must be a string")
-            raise ValidationError("API token must be a string")
-        
-        if len(self.api_token.strip()) < 10:
-            logger.error("API token appears to be invalid (too short)")
-            raise ValidationError("API token appears to be invalid")
-        
-        token_preview = f"{self.api_token[:4]}***{self.api_token[-4:]}" if len(self.api_token) > 8 else "***"
-        logger.info(f"API token validated successfully: {token_preview}")
-            
-        self.web_unlocker_zone = web_unlocker_zone or os.getenv('WEB_UNLOCKER_ZONE', 'sdk_unlocker')
-        self.serp_zone = serp_zone or os.getenv('SERP_ZONE', 'sdk_serp')
-        self.browser_zone = browser_zone or os.getenv('BROWSER_ZONE', 'sdk_browser')
-        self.auto_create_zones = auto_create_zones
-        
-        self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
-        self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
-        
-        self.search = Search(self)
-        
-        valid_browser_types = ["playwright", "puppeteer", "selenium"]
-        if browser_type not in valid_browser_types:
-            raise ValidationError(f"Invalid browser_type '{browser_type}'. Must be one of: {valid_browser_types}")
-        self.browser_type = browser_type
-        
-        if self.browser_username and self.browser_password:
-            browser_preview = f"{self.browser_username[:3]}***"
-            logger.info(f"Browser credentials configured: {browser_preview} (type: {self.browser_type})")
-        elif self.browser_username or self.browser_password:
-            logger.warning("Incomplete browser credentials: both username and password are required for browser API")
-        else:
-            logger.debug("No browser credentials provided - browser API will not be available")
-        
-        self.session = requests.Session()
-        
-        auth_header = f'Bearer {self.api_token}'
-        self.session.headers.update({
-            'Authorization': auth_header,
-            'Content-Type': 'application/json',
-            'User-Agent': f'brightdata-sdk/{__version__}'
-        })
-        
-        logger.info("HTTP session configured with secure headers")
-        
-        adapter = requests.adapters.HTTPAdapter(
-            pool_connections=self.CONNECTION_POOL_SIZE,
-            pool_maxsize=self.CONNECTION_POOL_SIZE,
-            max_retries=0
-        )
-        self.session.mount('https://', adapter)
-        self.session.mount('http://', adapter)
-        
-        self.zone_manager = ZoneManager(self.session)
-        self.web_scraper = WebScraper(
-            self.session, 
-            self.DEFAULT_TIMEOUT, 
-            self.MAX_RETRIES, 
-            self.RETRY_BACKOFF_FACTOR
-        )
-        self.search_api = SearchAPI(
-            self.session,
-            self.DEFAULT_TIMEOUT,
-            self.MAX_RETRIES,
-            self.RETRY_BACKOFF_FACTOR
-        )
-        self.chatgpt_api = ChatGPTAPI(
-            self.session,
-            self.api_token,
-            self.DEFAULT_TIMEOUT,
-            self.MAX_RETRIES,
-            self.RETRY_BACKOFF_FACTOR
-        )
-        self.linkedin_api = LinkedInAPI(
-            self.session,
-            self.api_token,
-            self.DEFAULT_TIMEOUT,
-            self.MAX_RETRIES,
-            self.RETRY_BACKOFF_FACTOR
+    """
+    Initialize the Bright Data client with your API token.
+
+    Create an account at https://brightdata.com/ to get your API token.
+    Go to Settings > API Keys and verify that your key has "Admin" permissions.
+
+    Args:
+        api_token: Your Bright Data API token (or set BRIGHTDATA_API_TOKEN env var)
+        auto_create_zones: Auto-create required zones if missing (default: True)
+        web_unlocker_zone: Custom Web Unlocker zone name (default: 'sdk_unlocker')
+        serp_zone: Custom SERP zone name (default: 'sdk_serp')
+        browser_zone: Custom Browser zone name (default: 'sdk_browser')
+        browser_username: Browser API username ("username-zone-{zone_name}")
+        browser_password: Browser API password
+        browser_type: "playwright", "puppeteer", or "selenium" (default: "playwright")
+        log_level: Logging level
+        structured_logging: Enable structured JSON logging
+        verbose: When True, show all logs per log_level. Can also use BRIGHTDATA_VERBOSE env var.
+    """
+
+    try:
+        from dotenv import load_dotenv
+        load_dotenv()
+    except ImportError:
+        pass
+
+    if verbose is None:
+        env_verbose = os.getenv('BRIGHTDATA_VERBOSE', '').lower()
+        verbose = env_verbose in ('true', '1', 'yes', 'on')
+
+    setup_logging(log_level, structured_logging, verbose)
+    logger.info("Initializing Bright Data SDK client")
+
+    # API Token Validation
+    self.api_token = api_token or os.getenv('BRIGHTDATA_API_TOKEN')
+    if not self.api_token:
+        logger.error("API token not provided")
+        raise ValidationError(
+            "API token is required. Pass api_token or set BRIGHTDATA_API_TOKEN env var."
         )
-        self.download_api = DownloadAPI(
-            self.session,
-            self.api_token,
-            self.DEFAULT_TIMEOUT
+
+    if not isinstance(self.api_token, str):
+        logger.error("API token must be a string")
+        raise ValidationError("API token must be a string")
+
+    if len(self.api_token.strip()) < 10:
+        logger.error("API token appears to be invalid (too short)")
+        raise ValidationError("API token appears to be invalid")
+
+    token_preview = f"{self.api_token[:4]}***{self.api_token[-4:]}"
+    logger.info(f"API token validated successfully: {token_preview}")
+
+    self.web_unlocker_zone = web_unlocker_zone or os.getenv('WEB_UNLOCKER_ZONE', 'sdk_unlocker')
+    self.serp_zone = serp_zone or os.getenv('SERP_ZONE', 'sdk_serp')
+    self.browser_zone = browser_zone or os.getenv('BROWSER_ZONE', 'sdk_browser')
+    self.auto_create_zones = auto_create_zones
+
+    self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
+    self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
+
+    valid_browser_types = ["playwright", "puppeteer", "selenium"]
+    if browser_type not in valid_browser_types:
+        raise ValidationError(
+            f"Invalid browser_type '{browser_type}'. Must be one of: {valid_browser_types}"
         )
-        self.crawl_api = CrawlAPI(
-            self.session,
-            self.api_token,
-            self.DEFAULT_TIMEOUT,
-            self.MAX_RETRIES,
-            self.RETRY_BACKOFF_FACTOR
+    self.browser_type = browser_type
+
+    if self.browser_username and self.browser_password:
+        browser_preview = f"{self.browser_username[:3]}***"
+        logger.info(f"Browser credentials configured: {browser_preview} (type: {self.browser_type})")
+    elif self.browser_username or self.browser_password:
+        logger.warning("Incomplete browser credentials: both username and password are required.")
+    else:
+        logger.debug("No browser credentials provided - browser API will not be available.")
+
+    self.session = requests.Session()
+    self.session.headers.update({
+        'Authorization': f'Bearer {self.api_token}',
+        'Content-Type': 'application/json',
+        'User-Agent': f'brightdata-sdk/{__version__}'
+    })
+    logger.info("HTTP session configured with secure headers")
+
+    adapter = requests.adapters.HTTPAdapter(
+        pool_connections=self.CONNECTION_POOL_SIZE,
+        pool_maxsize=self.CONNECTION_POOL_SIZE,
+        max_retries=0
+    )
+    self.session.mount('https://', adapter)
+    self.session.mount('http://', adapter)
+
+    self.zone_manager = ZoneManager(self.session)
+
+    self.web_scraper = WebScraper(
+        self.session,
+        self.DEFAULT_TIMEOUT,
+        self.MAX_RETRIES,
+        self.RETRY_BACKOFF_FACTOR
+    )
+
+    self.search_api = SearchAPI(
+        self.session,
+        self.DEFAULT_TIMEOUT,
+        self.MAX_RETRIES,
+        self.RETRY_BACKOFF_FACTOR
+    )
+
+    self.chatgpt_api = ChatGPTAPI(
+        self.session,
+        self.api_token,
+        self.DEFAULT_TIMEOUT,
+        self.MAX_RETRIES,
+        self.RETRY_BACKOFF_FACTOR
+    )
+
+    self.linkedin_api = LinkedInAPI(
+        self.session,
+        self.api_token,
+        self.DEFAULT_TIMEOUT,
+        self.MAX_RETRIES,
+        self.RETRY_BACKOFF_FACTOR
+    )
+
+    self.download_api = DownloadAPI(self.session, self.api_token, self.DEFAULT_TIMEOUT)
+
+    self.crawl_api = CrawlAPI(
+        self.session,
+        self.api_token,
+        self.DEFAULT_TIMEOUT,
+        self.MAX_RETRIES,
+        self.RETRY_BACKOFF_FACTOR
+    )
+
+    self.extract_api = ExtractAPI(self)
+
+    from .search import Search
+    self.search = Search(self)
+
+    if self.auto_create_zones:
+        self.zone_manager.ensure_required_zones(
+            self.web_unlocker_zone,
+            self.serp_zone
         )
-        self.extract_api = ExtractAPI(self)
-        
-        if self.auto_create_zones:
-            self.zone_manager.ensure_required_zones(
-                self.web_unlocker_zone, 
-                self.serp_zone
-            )
+
     
     def scrape(
         self,

From fc79dedbf398e67a03fb77b8102a1201b671c461 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Mon, 3 Nov 2025 09:27:36 +0200
Subject: [PATCH 68/70] Update client.py

---
 src/client.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/client.py b/src/client.py
index c5ed140..8814f12 100644
--- a/src/client.py
+++ b/src/client.py
@@ -191,7 +191,6 @@ def __init__(
 
     self.extract_api = ExtractAPI(self)
 
-    from .search import Search
     self.search = Search(self)
 
     if self.auto_create_zones:

From 4857448c6a1ef2631fd238361290c159ccf64fe5 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Mon, 3 Nov 2025 09:36:06 +0200
Subject: [PATCH 69/70] Update client.py

---
 src/client.py | 278 +++++++++++++++++++++++++-------------------------
 1 file changed, 139 insertions(+), 139 deletions(-)

diff --git a/src/client.py b/src/client.py
index 8814f12..1d4f794 100644
--- a/src/client.py
+++ b/src/client.py
@@ -55,149 +55,149 @@ def __init__(
         log_level: str = "INFO",
         structured_logging: bool = True,
         verbose: bool = None
-    ):
-    """
-    Initialize the Bright Data client with your API token.
-
-    Create an account at https://brightdata.com/ to get your API token.
-    Go to Settings > API Keys and verify that your key has "Admin" permissions.
-
-    Args:
-        api_token: Your Bright Data API token (or set BRIGHTDATA_API_TOKEN env var)
-        auto_create_zones: Auto-create required zones if missing (default: True)
-        web_unlocker_zone: Custom Web Unlocker zone name (default: 'sdk_unlocker')
-        serp_zone: Custom SERP zone name (default: 'sdk_serp')
-        browser_zone: Custom Browser zone name (default: 'sdk_browser')
-        browser_username: Browser API username ("username-zone-{zone_name}")
-        browser_password: Browser API password
-        browser_type: "playwright", "puppeteer", or "selenium" (default: "playwright")
-        log_level: Logging level
-        structured_logging: Enable structured JSON logging
-        verbose: When True, show all logs per log_level. Can also use BRIGHTDATA_VERBOSE env var.
-    """
-
-    try:
-        from dotenv import load_dotenv
-        load_dotenv()
-    except ImportError:
-        pass
-
-    if verbose is None:
-        env_verbose = os.getenv('BRIGHTDATA_VERBOSE', '').lower()
-        verbose = env_verbose in ('true', '1', 'yes', 'on')
-
-    setup_logging(log_level, structured_logging, verbose)
-    logger.info("Initializing Bright Data SDK client")
+        ):
+        """
+        Initialize the Bright Data client with your API token.
+    
+        Create an account at https://brightdata.com/ to get your API token.
+        Go to Settings > API Keys and verify that your key has "Admin" permissions.
+    
+        Args:
+            api_token: Your Bright Data API token (or set BRIGHTDATA_API_TOKEN env var)
+            auto_create_zones: Auto-create required zones if missing (default: True)
+            web_unlocker_zone: Custom Web Unlocker zone name (default: 'sdk_unlocker')
+            serp_zone: Custom SERP zone name (default: 'sdk_serp')
+            browser_zone: Custom Browser zone name (default: 'sdk_browser')
+            browser_username: Browser API username ("username-zone-{zone_name}")
+            browser_password: Browser API password
+            browser_type: "playwright", "puppeteer", or "selenium" (default: "playwright")
+            log_level: Logging level
+            structured_logging: Enable structured JSON logging
+            verbose: When True, show all logs per log_level. Can also use BRIGHTDATA_VERBOSE env var.
+        """
 
-    # API Token Validation
-    self.api_token = api_token or os.getenv('BRIGHTDATA_API_TOKEN')
-    if not self.api_token:
-        logger.error("API token not provided")
-        raise ValidationError(
-            "API token is required. Pass api_token or set BRIGHTDATA_API_TOKEN env var."
+        try:
+            from dotenv import load_dotenv
+            load_dotenv()
+        except ImportError:
+            pass
+    
+        if verbose is None:
+            env_verbose = os.getenv('BRIGHTDATA_VERBOSE', '').lower()
+            verbose = env_verbose in ('true', '1', 'yes', 'on')
+    
+        setup_logging(log_level, structured_logging, verbose)
+        logger.info("Initializing Bright Data SDK client")
+    
+        # API Token Validation
+        self.api_token = api_token or os.getenv('BRIGHTDATA_API_TOKEN')
+        if not self.api_token:
+            logger.error("API token not provided")
+            raise ValidationError(
+                "API token is required. Pass api_token or set BRIGHTDATA_API_TOKEN env var."
+            )
+    
+        if not isinstance(self.api_token, str):
+            logger.error("API token must be a string")
+            raise ValidationError("API token must be a string")
+    
+        if len(self.api_token.strip()) < 10:
+            logger.error("API token appears to be invalid (too short)")
+            raise ValidationError("API token appears to be invalid")
+    
+        token_preview = f"{self.api_token[:4]}***{self.api_token[-4:]}"
+        logger.info(f"API token validated successfully: {token_preview}")
+    
+        self.web_unlocker_zone = web_unlocker_zone or os.getenv('WEB_UNLOCKER_ZONE', 'sdk_unlocker')
+        self.serp_zone = serp_zone or os.getenv('SERP_ZONE', 'sdk_serp')
+        self.browser_zone = browser_zone or os.getenv('BROWSER_ZONE', 'sdk_browser')
+        self.auto_create_zones = auto_create_zones
+    
+        self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
+        self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
+    
+        valid_browser_types = ["playwright", "puppeteer", "selenium"]
+        if browser_type not in valid_browser_types:
+            raise ValidationError(
+                f"Invalid browser_type '{browser_type}'. Must be one of: {valid_browser_types}"
+            )
+        self.browser_type = browser_type
+    
+        if self.browser_username and self.browser_password:
+            browser_preview = f"{self.browser_username[:3]}***"
+            logger.info(f"Browser credentials configured: {browser_preview} (type: {self.browser_type})")
+        elif self.browser_username or self.browser_password:
+            logger.warning("Incomplete browser credentials: both username and password are required.")
+        else:
+            logger.debug("No browser credentials provided - browser API will not be available.")
+    
+        self.session = requests.Session()
+        self.session.headers.update({
+            'Authorization': f'Bearer {self.api_token}',
+            'Content-Type': 'application/json',
+            'User-Agent': f'brightdata-sdk/{__version__}'
+        })
+        logger.info("HTTP session configured with secure headers")
+    
+        adapter = requests.adapters.HTTPAdapter(
+            pool_connections=self.CONNECTION_POOL_SIZE,
+            pool_maxsize=self.CONNECTION_POOL_SIZE,
+            max_retries=0
         )
-
-    if not isinstance(self.api_token, str):
-        logger.error("API token must be a string")
-        raise ValidationError("API token must be a string")
-
-    if len(self.api_token.strip()) < 10:
-        logger.error("API token appears to be invalid (too short)")
-        raise ValidationError("API token appears to be invalid")
-
-    token_preview = f"{self.api_token[:4]}***{self.api_token[-4:]}"
-    logger.info(f"API token validated successfully: {token_preview}")
-
-    self.web_unlocker_zone = web_unlocker_zone or os.getenv('WEB_UNLOCKER_ZONE', 'sdk_unlocker')
-    self.serp_zone = serp_zone or os.getenv('SERP_ZONE', 'sdk_serp')
-    self.browser_zone = browser_zone or os.getenv('BROWSER_ZONE', 'sdk_browser')
-    self.auto_create_zones = auto_create_zones
-
-    self.browser_username = browser_username or os.getenv('BRIGHTDATA_BROWSER_USERNAME')
-    self.browser_password = browser_password or os.getenv('BRIGHTDATA_BROWSER_PASSWORD')
-
-    valid_browser_types = ["playwright", "puppeteer", "selenium"]
-    if browser_type not in valid_browser_types:
-        raise ValidationError(
-            f"Invalid browser_type '{browser_type}'. Must be one of: {valid_browser_types}"
+        self.session.mount('https://', adapter)
+        self.session.mount('http://', adapter)
+    
+        self.zone_manager = ZoneManager(self.session)
+    
+        self.web_scraper = WebScraper(
+            self.session,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
         )
-    self.browser_type = browser_type
-
-    if self.browser_username and self.browser_password:
-        browser_preview = f"{self.browser_username[:3]}***"
-        logger.info(f"Browser credentials configured: {browser_preview} (type: {self.browser_type})")
-    elif self.browser_username or self.browser_password:
-        logger.warning("Incomplete browser credentials: both username and password are required.")
-    else:
-        logger.debug("No browser credentials provided - browser API will not be available.")
-
-    self.session = requests.Session()
-    self.session.headers.update({
-        'Authorization': f'Bearer {self.api_token}',
-        'Content-Type': 'application/json',
-        'User-Agent': f'brightdata-sdk/{__version__}'
-    })
-    logger.info("HTTP session configured with secure headers")
-
-    adapter = requests.adapters.HTTPAdapter(
-        pool_connections=self.CONNECTION_POOL_SIZE,
-        pool_maxsize=self.CONNECTION_POOL_SIZE,
-        max_retries=0
-    )
-    self.session.mount('https://', adapter)
-    self.session.mount('http://', adapter)
-
-    self.zone_manager = ZoneManager(self.session)
-
-    self.web_scraper = WebScraper(
-        self.session,
-        self.DEFAULT_TIMEOUT,
-        self.MAX_RETRIES,
-        self.RETRY_BACKOFF_FACTOR
-    )
-
-    self.search_api = SearchAPI(
-        self.session,
-        self.DEFAULT_TIMEOUT,
-        self.MAX_RETRIES,
-        self.RETRY_BACKOFF_FACTOR
-    )
-
-    self.chatgpt_api = ChatGPTAPI(
-        self.session,
-        self.api_token,
-        self.DEFAULT_TIMEOUT,
-        self.MAX_RETRIES,
-        self.RETRY_BACKOFF_FACTOR
-    )
-
-    self.linkedin_api = LinkedInAPI(
-        self.session,
-        self.api_token,
-        self.DEFAULT_TIMEOUT,
-        self.MAX_RETRIES,
-        self.RETRY_BACKOFF_FACTOR
-    )
-
-    self.download_api = DownloadAPI(self.session, self.api_token, self.DEFAULT_TIMEOUT)
-
-    self.crawl_api = CrawlAPI(
-        self.session,
-        self.api_token,
-        self.DEFAULT_TIMEOUT,
-        self.MAX_RETRIES,
-        self.RETRY_BACKOFF_FACTOR
-    )
-
-    self.extract_api = ExtractAPI(self)
-
-    self.search = Search(self)
-
-    if self.auto_create_zones:
-        self.zone_manager.ensure_required_zones(
-            self.web_unlocker_zone,
-            self.serp_zone
+    
+        self.search_api = SearchAPI(
+            self.session,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+    
+        self.chatgpt_api = ChatGPTAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+    
+        self.linkedin_api = LinkedInAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
         )
+    
+        self.download_api = DownloadAPI(self.session, self.api_token, self.DEFAULT_TIMEOUT)
+    
+        self.crawl_api = CrawlAPI(
+            self.session,
+            self.api_token,
+            self.DEFAULT_TIMEOUT,
+            self.MAX_RETRIES,
+            self.RETRY_BACKOFF_FACTOR
+        )
+    
+        self.extract_api = ExtractAPI(self)
+    
+        self.search = Search(self)
+    
+        if self.auto_create_zones:
+            self.zone_manager.ensure_required_zones(
+                self.web_unlocker_zone,
+                self.serp_zone
+            )
 
     
     def scrape(

From db8c569e9babd2ada94933b4acdb03c349102f29 Mon Sep 17 00:00:00 2001
From: Nadav Toledo <136907680+NadavToledo1@users.noreply.github.com>
Date: Mon, 3 Nov 2025 09:38:05 +0200
Subject: [PATCH 70/70] Update search.py

---
 src/search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/search.py b/src/search.py
index d56bbfd..215e65d 100644
--- a/src/search.py
+++ b/src/search.py
@@ -1,6 +1,7 @@
+from __future__ import annotations
+
 import re
 import time
-from __future__ import annotations
 from .exceptions import ValidationError, APIError
 from typing import Any, Dict, List, Optional, Union