In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import markdownify
from webdriver_manager.chrome import ChromeDriverManager
from urllib.parse import urlparse

def get_unique_interactive_elements(driver):
    """Find all unique interactive elements that likely control content"""
    elements = {}
    
    # 1. Find all anchor tags with hash fragments
    for a in driver.find_elements(By.CSS_SELECTOR, "a[href*='#']"):
        if a.text.strip():
            href = a.get_attribute('href')
            if href and '#' in href:
                section_id = href.split('#')[-1]
                elements[f"link_{section_id}"] = (a.text.strip(), a)
    
    # 2. Find all buttons that control content
    for btn in driver.find_elements(By.CSS_SELECTOR, "button[aria-controls]"):
        if btn.text.strip():
            section_id = btn.get_attribute('aria-controls')
            elements[f"btn_{section_id}"] = (btn.text.strip(), btn)
    
    # 3. Find tabs and pills (common UI patterns)
    for tab in driver.find_elements(By.CSS_SELECTOR, "[role='tab'], .nav-tabs a, .nav-pills a"):
        if tab.text.strip():
            tab_id = tab.get_attribute('id') or tab.get_attribute('aria-controls') or ''
            elements[f"tab_{tab_id}"] = (tab.text.strip(), tab)
    
    return list(elements.values())

def capture_section_content(driver, element, section_name):
    """Click an element and capture the resulting content"""
    try:
        # Scroll to element
        driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});", element)
        time.sleep(0.3)
        
        # Get current state
        prev_url = driver.current_url
        prev_active = driver.find_elements(By.CSS_SELECTOR, "[aria-expanded='true'], .active")
        
        # Click the element
        element.click()
        time.sleep(2)  # Base wait time
        
        # Dynamic wait for changes
        try:
            WebDriverWait(driver, 5).until(
                lambda d: d.current_url != prev_url or
                len(d.find_elements(By.CSS_SELECTOR, "[aria-expanded='true'], .active")) != len(prev_active) or
                d.execute_script("return document.readyState") == "complete"
            )
        except TimeoutException:
            pass
        
        # Capture the most likely content area
        try:
            # Try to find associated content panel
            if element.get_attribute("href") and '#' in element.get_attribute("href"):
                target_id = element.get_attribute("href").split('#')[-1]
                content = driver.find_element(By.ID, target_id).text
            elif element.get_attribute("aria-controls"):
                content = driver.find_element(By.ID, element.get_attribute("aria-controls")).text
            else:
                # Fallback to active tab content or main content
                active_panel = driver.find_elements(By.CSS_SELECTOR, ".tab-pane.active, [role='tabpanel'].active, .active [role='tabpanel']")
                content = active_panel[0].text if active_panel else driver.find_element(By.TAG_NAME, 'body').text
        except:
            content = driver.find_element(By.TAG_NAME, 'body').text
        
        return f"\n\n## {section_name}\n\n{content}"
    
    except Exception as e:
        print(f"Warning: Could not capture '{section_name}' - {str(e)}")
        return ""

def scrape_page_content(url):
    """Generalized webpage content scraper"""
    # Setup browser
    chrome_options = Options()
    chrome_options.add_argument("--headless=new")
    chrome_options.add_argument("--window-size=1200,900")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    
    driver = None
    try:
        # Initialize driver
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=chrome_options)
        
        # Navigate to page
        driver.get(url)
        
        # Wait for initial load
        WebDriverWait(driver, 15).until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        time.sleep(2)
        
        # Get main content
        content = [f"# Page Content: {url}\n\n{driver.find_element(By.TAG_NAME, 'body').text}"]
        
        # Find and process interactive elements
        sections = get_unique_interactive_elements(driver)
        print(f"Found {len(sections)} interactive sections")
        
        processed = set()
        for name, element in sections:
            if name.lower() not in processed:
                print(f"Processing: {name}")
                section_content = capture_section_content(driver, element, name)
                if section_content:
                    content.append(section_content)
                    processed.add(name.lower())
        
        # Convert to markdown
        markdown_content = markdownify.markdownify("\n\n".join(content), heading_style="ATX")
        
        # Clean up empty lines
        return "\n".join(line for line in markdown_content.split("\n") if line.strip() or line == "")
    
    except Exception as e:
        print(f"Scraping failed: {str(e)}")
        return None
    finally:
        if driver:
            driver.quit()

# Example usage
if __name__ == "__main__":
    url = "https://www.fwd.co.th/en/health-insurance/easy-e-health/"
    result = scrape_page_content(url)
    
    if result:
        output_file = "test_v3.md"
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(result)
        print(f"Success! Content saved to {output_file}")
    else:
        print("Scraping failed")