## Block 1: Setup and Imports

In [10]:
import sys
import asyncio
from concurrent.futures import ThreadPoolExecutor
from functools import wraps

# Setup for Windows Jupyter
if sys.platform == 'win32':
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

import nest_asyncio
nest_asyncio.apply()

# Wrapper to run async functions with ProactorEventLoop in separate thread
def run_in_proactor_loop(coro_func):
    """Run async function in a separate thread with ProactorEventLoop"""
    @wraps(coro_func)
    async def wrapper(*args, **kwargs):
        def run_in_thread():
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                result = loop.run_until_complete(coro_func(*args, **kwargs))
                # Give background tasks (like litellm logging) time to complete
                pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
                if pending:
                    # Wait briefly for tasks to complete naturally
                    try:
                        loop.run_until_complete(
                            asyncio.wait_for(
                                asyncio.gather(*pending, return_exceptions=True),
                                timeout=1.0
                            )
                        )
                    except (asyncio.TimeoutError, RuntimeError):
                        # Cancel remaining tasks if they don't complete
                        for task in pending:
                            if not task.done():
                                task.cancel()
                        # Wait for cancellations
                        try:
                            loop.run_until_complete(
                                asyncio.gather(*pending, return_exceptions=True)
                            )
                        except Exception:
                            pass
                return result
            finally:
                try:
                    loop.close()
                except Exception:
                    pass  # Ignore errors during close
        
        executor = ThreadPoolExecutor(max_workers=1)
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(executor, run_in_thread)
    
    return wrapper

print("✓ Windows Playwright support enabled")

# Now import everything else
import json
from datetime import datetime, timedelta
from typing import Dict, Any, Optional, List
from playground.browser_tools_v12.agent import ScrapingAgent
from playground.browser_tools_v12 import core

✓ Windows Playwright support enabled


## Block 2: Configuration

In [2]:
# Configuration
url = "https://rollcall.com/factbase/trump/search/"
START_DATE = "2026-01-06"
END_DATE = "2026-01-10"

In [3]:
action = None
js_code, virtual_scroll_config = core._prepare_action(action)

In [4]:
config = core.CrawlerRunConfig(
    cache_mode=core.CacheMode.BYPASS,
    simulate_user=True,
    verbose=False,
    page_timeout=30000,
    remove_overlay_elements=True,
    word_count_threshold=100,
    markdown_generator=core.DefaultMarkdownGenerator(),
    extraction_strategy=core._create_extraction_strategy(),
    virtual_scroll_config=virtual_scroll_config,
    js_code=js_code,
    delay_before_return_html=core._get_delay_for_action(action),
    excluded_tags=['script', 'style']
)

In [5]:
# Wrap the crawler call to run in a ProactorEventLoop thread
@run_in_proactor_loop
async def run_crawler():
    async with core.AsyncWebCrawler(config=core.BROWSER_CONFIG) as crawler:
        return await core._run_crawler(url, config, crawler=crawler)

result = await run_crawler()

In [6]:
article_data, error_msg, failed_output = core._parse_extraction_result(result)

In [7]:
navigation_options = core._extract_navigation_options(article_data)

In [8]:
from playground.browser_tools_v12.core import PageObservation
observation = PageObservation(
    url=result.url,
    articles=article_data.articles,
    navigation_options=navigation_options
)

In [11]:
agent = ScrapingAgent()

# Wrap the _process_single_page call to run in a ProactorEventLoop thread
@run_in_proactor_loop
async def run_process_single_page():
    async with core.AsyncWebCrawler(config=core.BROWSER_CONFIG) as crawler:
        return await agent._process_single_page(url, crawler=crawler, action=None)

page_result = await run_process_single_page()
page_result

{'url': 'https://rollcall.com/factbase/trump/search/',
 'articles': [{'title': 'Press Gaggle: Donald Trump Speaks to Reporters Before Air Force One Departure - January 19, 2025',
   'url': 'https://rollcall.com/factbase/trump/transcript/donald-trump-press-gaggle-before-air-force-one-departure-january-19-2026/',
   'publication_date': '2026-01-19',
   'date_confidence': 'HIGH',
   'date_source': 'near_title'},
  {'title': 'Interview: No Transcript - Peter Nicholas Interviews Donald Trump for NBC News - January 19, 2026',
   'url': 'https://rollcall.com/factbase/trump/transcript/donald-trump-interview-no-transcript-peter-nicholas-nbc-news-january-19-2026/',
   'publication_date': '2026-01-19',
   'date_confidence': 'HIGH',
   'date_source': 'near_title'},
  {'title': 'Remarks: Donald Trump and Jonas Gahr Støre of Norway Exchange Text Messages - January 19, 2026',
   'url': 'https://rollcall.com/factbase/trump/transcript/donald-trump-remarks-text-exchange-norway-greenland-january-18-2026/