# Adaptive Crawling

In [1]:
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, BrowserConfig
from IPython.display import Markdown, display, HTML
import pprint

def printmd(content):
    display(Markdown(content))
    
def printhtml(content):
    display(HTML(content))
    
def pretty_print(content):
    pprint.pp(content)
    
sample_url="https://loigiaihay.com/de-thi-vao-lop-6-mon-toan-truong-cau-giay-nam-2023-a142098.html"


## Basic usage

In [3]:
from crawl4ai import AdaptiveCrawler
async with AsyncWebCrawler() as crawler:
    adaptive = AdaptiveCrawler(crawler)
    result = await adaptive.digest(
        start_url=sample_url,
        query="Lời giải chi tiết đề thi vào lớp 6 môn toán"
    )
    

In [4]:
adaptive.print_stats()

In [5]:
relevant_pages = adaptive.get_relevant_content(top_k=5)
relevant_pages

[{'url': 'https://loigiaihay.com/de-thi-vao-lop-6-mon-toan-truong-cau-giay-nam-2023-a142098.html',
  'score': 1.0,
  'content': '[ ![](https://loigiaihay.com/themes/style/images/logolgh.png) ](https://loigiaihay.com/ "loigiaihay.com")\n  * [Lớp 12](https://loigiaihay.com/lop-12.html)\n    * ![](https://loigiaihay.com/themes/style/images/i-van.png) Ngữ văn 12\n      * [Soạn văn - Kết nối tri thức](https://loigiaihay.com/soan-van-12-ket-noi-tri-thuc-c1737.html "Soạn văn - Kết nối tri thức")\n      * [Soạn văn - Cánh diều](https://loigiaihay.com/soan-van-12-canh-dieu-c1738.html "Soạn văn - Cánh diều")\n      * [Soạn văn - Chân trời sáng tạo](https://loigiaihay.com/soan-van-12-chan-troi-sang-tao-c1739.html "Soạn văn - Chân trời sáng tạo")\n      * [Tác giả tác phẩm](https://loigiaihay.com/tac-gia-tac-pham-lop-12-c1851.html "Tác giả tác phẩm")\n      * [Tóm tắt, bố cục Văn - Kết nối tri thức](https://loigiaihay.com/tom-tat-bo-cuc-van-12-ket-noi-tri-thuc-c1848.html "Tóm tắt, bố cục Văn - Kết

In [6]:
for page in relevant_pages:
    print(f"- {page['url']} (score: {page['score']:.2f})")

- https://loigiaihay.com/de-thi-vao-lop-6-mon-toan-truong-cau-giay-nam-2023-a142098.html (score: 1.00)


## Config options

In [7]:
from crawl4ai import AdaptiveConfig

config = AdaptiveConfig(
    confidence_threshold=0.7,    # Stop when 70% confident (default: 0.8)
    max_pages=20,               # Maximum pages to crawl (default: 50)
    top_k_links=3,              # Links to follow per page (default: 5)
    min_gain_threshold=0.05     # Minimum expected gain to continue (default: 0.1)
)

async with AsyncWebCrawler() as crawler:
    adaptive = AdaptiveCrawler(crawler, config)
    result = await adaptive.digest(
        start_url=sample_url,
        query="Lời giải chi tiết đề thi vào lớp 6 môn toán"
    )

In [8]:
relevant_pages = adaptive.get_relevant_content(top_k=5)
for page in relevant_pages:
    print(f"- {page['url']} (score: {page['score']:.2f})")

- https://loigiaihay.com/de-thi-vao-lop-6-mon-toan-truong-cau-giay-nam-2023-a142098.html (score: 1.00)
