In [4]:
import asyncio
import json
import sys
import os
import tempfile
from pathlib import Path
from crawl4ai import AsyncWebCrawler

# Create temporary script
script_content = """
import asyncio
import json
import sys
from crawl4ai import AsyncWebCrawler

async def main():
    async with AsyncWebCrawler(verbose=False) as crawler:
        result = await crawler.arun(
            url="https://docs.llamaindex.ai/en/stable/understanding/",
            bypass_cache=True
        )

        # Extract and structure the data
        data_res = {
            "data": [{
                "text": result.markdown,
                "meta": {
                    "url": result.url,
                    "meta": result.metadata
                }
            }]
        }

        # Print ONLY the JSON to stdout
        print(json.dumps(data_res), flush=True)

if __name__ == "__main__":
    asyncio.run(main())
"""

# Write to temp file
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
    f.write(script_content)
    script_path = Path(f.name)

try:
    # Run the script
    import subprocess

    result = subprocess.run(
        [sys.executable, str(script_path)],
        capture_output=True,
        text=True,
        timeout=120,
        env={**os.environ, "PYTHONUNBUFFERED": "1"},
    )

    if result.returncode == 0:
        # Parse only the last line (the JSON output)
        lines = result.stdout.strip().split("\n")
        json_line = lines[-1]

        data_res = json.loads(json_line)
        print("Successfully crawled!")
        print(f"Number of results: {len(data_res['data'])}")
        print(f"Text length: {len(data_res['data'][0]['text'])} characters")
        print("-"*80)
        # After the successful crawl, you can access:
        print(data_res['data'][0]['text'][:1500])  # First 500 characters
        print(data_res['data'][0]['meta']['url'])  # URL

        # Add this before the finally block:
        with open('crawled_data.json', 'w', encoding='utf-8') as f:
            json.dump(data_res, f, indent=2, ensure_ascii=False)
    else:
        print("Error:", result.stderr)
finally:
    # Clean up
    script_path.unlink()


Successfully crawled!
Number of results: 1
Text length: 80077 characters
--------------------------------------------------------------------------------
[Skip to content](https://developers.llamaindex.ai/python/framework/understanding/#_top)
[ ![](https://developers.llamaindex.ai/python/_astro/llamaindex-light.BJap_D_H.svg) ![](https://developers.llamaindex.ai/python/_astro/llamaindex-dark.BpGWuI4l.svg) LlamaIndex Python Documentation  ](https://developers.llamaindex.ai/)
Search ` `Ctrl``K` `
Cancel 
Clear
[TypeScript](https://developers.llamaindex.ai/typescript/framework/)
[Twitter](https://x.com/llama_index)[LinkedIn](https://www.linkedin.com/company/llamaindex)[Bluesky](https://bsky.app/profile/llamaindex.bsky.social)[GitHub](https://github.com/run-llama/llama_index/)
Select theme Dark Light Auto
  * LlamaCloud
    * [ Welcome to LlamaCloud ](https://developers.llamaindex.ai/python/cloud/)
    * Parse
      * [ Overview of LlamaParse ](https://developers.llamaindex.ai/python/cloud/