# Scraper Demo: Crawl4AI + Polars

This document demonstrates the scraping pipeline for a few sample URLs using the project modules.

## 1. Extract Content URLs

In [33]:
import asyncio
from crawl4ai import AsyncWebCrawler
from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig

async def main():
    browser_config = BrowserConfig()  # Default browser configuration
    run_config = CrawlerRunConfig(
        cache_mode=CacheMode.ENABLED
    )

    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun(
            url="https://www.versnellingsplan.nl/kennisbank/",
            config=run_config # CrawlerRunConfig(fit_markdown=True)
        )

        if result.success:

        # print(result.markdown)  # Print clean markdown content



In [40]:
import asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.deep_crawling import BFSDeepCrawlStrategy
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy
from crawl4ai.deep_crawling.filters import FilterChain, URLPatternFilter

async def main():
    # Configure a 2-level deep crawl

    url_filter = URLPatternFilter(pattern=r"^https://versnellingsplan\.nl/kennisbank/*")

    config = CrawlerRunConfig(
        deep_crawl_strategy=BFSDeepCrawlStrategy(
            max_depth=1,
            filter_chain=FilterChain([url_filter])
            include_external=False,
            max_pages=5
        ),
        scraping_strategy=LXMLWebScrapingStrategy(),
        verbose=True
    )

    async with AsyncWebCrawler() as crawler:
        results = await crawler.arun("https://versnellingsplan.nl/kennisbank/", config=config)

        print(f"Crawled {len(results)} pages in total")

        # Access individual results
        for result in results[:3]:  # Show first 3 results
            print(f"URL: {result.url}")
            print(f"Depth: {result.metadata.get('depth', 0)}")


In [34]:
await main()