In [2]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler

# Apply the patch to allow nested event loops
nest_asyncio.apply()

async def main():
    async with AsyncWebCrawler() as crawler:
        # Fetch a webpage and convert its HTML to Markdown
        result = await crawler.arun("https://example.com")
        print(result.markdown[:300])  # Print first 300 characters

# Now this works even in Jupyter/IPython
asyncio.run(main())


[INIT].... → Crawl4AI 0.5.0.post8
[FETCH]... ↓ https://example.com... | Status: True | Time: 1.05s
[SCRAPE].. ◆ https://example.com... | Time: 0.012s
[COMPLETE] ● https://example.com... | Status: True | Total: 1.08s
# Example Domain
This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
[More information...](https://www.iana.org/domains/example)



In [3]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler

# Apply the patch to allow nested event loops
nest_asyncio.apply()

async def main():
    async with AsyncWebCrawler() as crawler:
        # Fetch a webpage and convert its HTML to Markdown
        result = await crawler.arun("https://example.com")
        print(result.markdown[:300])  # Print first 300 characters

# Now this works even in Jupyter/IPython
asyncio.run(main())


  return compile(source, filename, mode, flags,


TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [2]:
import asyncio
import nest_asyncio
import os
import subprocess
import time
from crawl4ai import AsyncWebCrawler

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

# Function to kill lingering browser processes
def cleanup_browser_processes():
    """Kill any lingering Chromium/Chrome processes"""
    try:
        if os.name == 'posix':  # Linux/Mac
            subprocess.run(['pkill', '-f', 'chromium'], stderr=subprocess.DEVNULL)
            subprocess.run(['pkill', '-f', 'chrome'], stderr=subprocess.DEVNULL)
        elif os.name == 'nt':  # Windows
            subprocess.run(['taskkill', '/F', '/IM', 'chrome.exe'], stderr=subprocess.DEVNULL)
            subprocess.run(['taskkill', '/F', '/IM', 'chromium.exe'], stderr=subprocess.DEVNULL)
        # Give OS time to release resources
        time.sleep(1)
    except Exception as e:
        print(f"Process cleanup warning: {e}")

# Always clean up before starting
cleanup_browser_processes()

async def main():
    try:
        async with AsyncWebCrawler() as crawler:
            # Fetch a webpage and convert its HTML to Markdown
            result = await crawler.arun("https://example.com")
            print(result.markdown[:300])  # Print first 300 characters
    except Exception as e:
        print(f"Error during crawling: {e}")
        # Clean up on error
        cleanup_browser_processes()

# Now this works even in Jupyter/IPython
asyncio.run(main())


Error during crawling: BrowserType.launch: Target page, context or browser has been closed


In [3]:
import asyncio
import nest_asyncio
import uuid
from crawl4ai import AsyncWebCrawler, BrowserConfig

# Apply nest_asyncio
nest_asyncio.apply()

async def main():
    # Create a unique browser instance each time
    browser_config = BrowserConfig(
        browser_type="chromium",
        headless=True,
        # Use a unique browser ID each time
        browser_id=f"browser-{uuid.uuid4()}"
    )
    
    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun("https://example.com")
        print(result.markdown[:300])

asyncio.run(main())


TypeError: BrowserConfig.__init__() got an unexpected keyword argument 'browser_id'

In [1]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler
import time

# Apply the patch to allow nested event loops
nest_asyncio.apply()

async def crawl_once(url="https://example.com"):
    crawler = AsyncWebCrawler()
    try:
        await crawler.start()
        # Fetch a webpage and convert its HTML to Markdown
        result = await crawler.arun(url)
        print(f"Crawl result for {url}:")
        print(result.markdown[:300])  # Print first 300 characters
        return result
    finally:
        # Ensure proper cleanup of all browser resources
        if hasattr(crawler, 'crawler_strategy') and crawler.crawler_strategy:
            if hasattr(crawler.crawler_strategy, 'browser_manager') and crawler.crawler_strategy.browser_manager:
                if hasattr(crawler.crawler_strategy.browser_manager, 'page') and crawler.crawler_strategy.browser_manager.page:
                    try:
                        await crawler.crawler_strategy.browser_manager.page.unroute_all(behavior="ignore_errors")
                    except Exception as e:
                        print(f"Unroute warning: {e}")
        await crawler.close()

async def main():
    print("Starting first crawl...")
    await crawl_once()
    
    # Add a small delay between crawls to ensure complete cleanup
    time.sleep(1)
    
    print("\nStarting second crawl...")
    await crawl_once()

# Run both crawls in sequence
asyncio.run(main())


Starting first crawl...
[INIT].... → Crawl4AI 0.5.0.post8
[FETCH]... ↓ https://example.com... | Status: True | Time: 1.29s
[SCRAPE].. ◆ https://example.com... | Time: 0.006s
[COMPLETE] ● https://example.com... | Status: True | Total: 1.31s
Crawl result for https://example.com:
# Example Domain
This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
[More information...](https://www.iana.org/domains/example)


Starting second crawl...


TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [1]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig

# Patch asyncio to allow nested event loops (useful in Jupyter/IPython)
nest_asyncio.apply()

async def crawl_once(url: str = "https://example.com"):
    crawler = AsyncWebCrawler()
    try:
        await crawler.start()
        
        # Optional: Define crawl config, e.g., bypass cache
        run_config = CrawlerRunConfig(cache_mode=CacheMode.BYPASS)
        
        # Run the crawler on the URL
        result = await crawler.arun(url, config=run_config)
        
        # Print first 300 characters of markdown content
        print(f"Crawl result for {url}:\n{result.markdown[:300]}\n")
        
        # If you have access to the page, unroute all routes to avoid TargetClosedError
        # This is a safeguard if you use page.route() in your code (optional)
        if hasattr(crawler, 'crawler_strategy') and crawler.crawler_strategy:
            browser_manager = crawler.crawler_strategy.browser_manager
            if browser_manager and hasattr(browser_manager, 'default_context'):
                context = browser_manager.default_context
                try:
                    await context.unroute_all(behavior="ignore_errors")
                except Exception as e:
                    print(f"Warning during unroute_all: {e}")
    finally:
        # Ensure crawler is properly closed to clean up browser processes
        await crawler.close()

async def main():
    print("Starting first crawl...")
    await crawl_once("https://example.com")
    
    # Small delay to ensure OS-level cleanup (optional but recommended)
    await asyncio.sleep(1)
    
    print("Starting second crawl...")
    await crawl_once("https://example.com")

if __name__ == "__main__":
    asyncio.run(main())


Starting first crawl...
[INIT].... → Crawl4AI 0.5.0.post8
[FETCH]... ↓ https://example.com... | Status: True | Time: 1.11s
[SCRAPE].. ◆ https://example.com... | Time: 0.01s
[COMPLETE] ● https://example.com... | Status: True | Total: 1.14s
Crawl result for https://example.com:
# Example Domain
This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
[More information...](https://www.iana.org/domains/example)


Starting second crawl...


TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [1]:
import asyncio
import nest_asyncio
import os
import subprocess
import time
from crawl4ai import AsyncWebCrawler, CacheMode, CrawlerRunConfig

# Patch asyncio to allow nested event loops
nest_asyncio.apply()

# Function to forcibly kill browser processes
def kill_browser_processes():
    """Kill any lingering browser processes to ensure clean state"""
    try:
        if os.name == 'posix':  # Linux/Mac
            subprocess.run(['pkill', '-f', 'chromium'], stderr=subprocess.DEVNULL)
            subprocess.run(['pkill', '-f', 'chrome'], stderr=subprocess.DEVNULL)
        elif os.name == 'nt':  # Windows
            subprocess.run(['taskkill', '/F', '/IM', 'chrome.exe'], stderr=subprocess.DEVNULL)
            subprocess.run(['taskkill', '/F', '/IM', 'chromium.exe'], stderr=subprocess.DEVNULL)
    except Exception as e:
        print(f"Warning during process cleanup: {e}")
    
    # Give OS time to release resources
    time.sleep(1)

async def crawl_once(url: str = "https://example.com"):
    # Kill any lingering browser processes before starting
    kill_browser_processes()
    
    crawler = AsyncWebCrawler()
    try:
        await crawler.start()
        
        # Define crawl config with bypass cache to avoid stale data
        run_config = CrawlerRunConfig(
            cache_mode=CacheMode.BYPASS,
            page_timeout=30000  # 30 seconds timeout
        )
        
        # Run the crawler on the URL
        result = await crawler.arun(url, config=run_config)
        
        # Print first 300 characters of markdown content
        print(f"Crawl result for {url}:\n{result.markdown[:300]}\n")
        return result
    except Exception as e:
        print(f"Error during crawling: {e}")
        raise
    finally:
        # Ensure crawler is properly closed
        try:
            await crawler.close()
        except Exception as e:
            print(f"Warning during crawler close: {e}")
        
        # Force kill any remaining browser processes
        kill_browser_processes()

async def main():
    print("Starting first crawl...")
    await crawl_once("https://example.com")
    
    print("Starting second crawl...")
    await crawl_once("https://example.com")

if __name__ == "__main__":
    asyncio.run(main())


Starting first crawl...
[INIT].... → Crawl4AI 0.5.0.post8
[FETCH]... ↓ https://example.com... | Status: True | Time: 1.07s
[SCRAPE].. ◆ https://example.com... | Time: 0.01s
[COMPLETE] ● https://example.com... | Status: True | Total: 1.11s
Crawl result for https://example.com:
# Example Domain
This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
[More information...](https://www.iana.org/domains/example)


Starting second crawl...
Error during crawling: BrowserType.launch: Target page, context or browser has been closed


TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [1]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler

nest_asyncio.apply()

async def crawl_once(url):
    crawler = AsyncWebCrawler()
    await crawler.start()
    try:
        result = await crawler.arun(url)
        print(result.markdown[:300])
    finally:
        await crawler.close()

async def main():
    print("First crawl:")
    await crawl_once("https://example.com")
    print("\nSecond crawl:")
    await crawl_once("https://example.com")

asyncio.run(main())


First crawl:
[INIT].... → Crawl4AI 0.5.0.post8
[FETCH]... ↓ https://example.com... | Status: True | Time: 1.23s
[SCRAPE].. ◆ https://example.com... | Time: 0.006s
[COMPLETE] ● https://example.com... | Status: True | Total: 1.26s
# Example Domain
This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
[More information...](https://www.iana.org/domains/example)


Second crawl:


TargetClosedError: BrowserType.launch: Target page, context or browser has been closed

In [2]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler

nest_asyncio.apply()
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
from crawl4ai.cache_context import CacheMode

async def basic_session_crawl():
    async with AsyncWebCrawler() as crawler:
        session_id = "dynamic_content_session"
        url = "https://example.com/dynamic-content"

        for page in range(3):
            config = CrawlerRunConfig(
                url=url,
                session_id=session_id,
                js_code="document.querySelector('.load-more-button').click();" if page > 0 else None,
                css_selector=".content-item",
                cache_mode=CacheMode.BYPASS
            )

            result = await crawler.arun(config=config)
            print(f"Page {page + 1}: Found {result.extracted_content.count('.content-item')} items")

        await crawler.crawler_strategy.kill_session(session_id)

asyncio.run(basic_session_crawl())

[INIT].... → Crawl4AI 0.5.0.post8


TypeError: AsyncWebCrawler.arun() missing 1 required positional argument: 'url'