In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os
from datetime import datetime

In [8]:
def save_html_page(driver, filename):
    """Save the current page HTML to a local file"""
    try:
        # Create a directory for saved pages if it doesn't exist
        save_dir = "saudelegis_pages"
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
        # Get the page source
        page_source = driver.page_source
        
        # Create full file path
        file_path = os.path.join(save_dir, filename)
        
        # Save to file
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(page_source)
        
        print(f"HTML saved to: {file_path}")
        return file_path
        
    except Exception as e:
        print(f"Error saving HTML: {str(e)}")
        return None

In [9]:
def setup_driver():
    """Set up Chrome WebDriver with basic options"""
    chrome_options = Options()
    
    # Add some useful options
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    # Initialize the driver
    # Note: Make sure you have ChromeDriver installed and in your PATH
    # Or specify the path to ChromeDriver executable
    # service = Service("/path/to/chromedriver")  # Uncomment and modify if needed
    
    driver = webdriver.Chrome(options=chrome_options)
    
    # Set implicit wait
    driver.implicitly_wait(10)
    
    return driver

In [11]:
def buttons(driver, timestamp):
    for i in range(1, 4):
        try:
            print(f"\n--- Iteration {i} ---")
            
            # Wait for the button to be clickable
            print(f"Looking for button with xpath '//*[@id=\"form:j_idt161\"]'...")
            button = WebDriverWait(driver, 15).until(
                EC.element_to_be_clickable((By.XPATH, '//*[@id="form:j_idt161"]'))
            )
            
            # Click the button
            print(f"Clicking button (iteration {i})...")
            button.click()
            
            # Wait for the page to load after button click
            print("Waiting for page to load after button click...")
            time.sleep(5)
            
            # Save the HTML after this button click
            print(f"Saving HTML for page {i}...")
            save_html_page(driver, f"search_results_page_{i}_{timestamp}.html")
            
            print(f"Iteration {i} completed successfully!")
            
        except Exception as e:
            print(f"Error in iteration {i}: {str(e)}")
            # Continue with next iteration even if this one fails
            continue

In [12]:
def access_saudelegis():
    """Main function to access the SaudeLegis website"""
    driver = None
    
    try:
        # Set up the driver
        print("Setting up Chrome WebDriver...")
        driver = setup_driver()
        
        # Navigate to the website
        url = "https://saudelegis.saude.gov.br/saudelegis/secure/norma/listPublic.xhtml"
        print(f"Accessing: {url}")
        driver.get(url)
        
        # Wait for the page to load
        print("Waiting for page to load...")
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        
        # Print basic page information
        print(f"Page title: {driver.title}")
        print(f"Current URL: {driver.current_url}")
        print("Page loaded successfully!")
        
        # Wait for the form field to be present and interactable
        print("Looking for the 'assunto' form field...")
        assunto_field = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="form:assunto"]'))
        )
        
        # Clear the field and enter "doenças raras"
        print("Entering 'doenças raras' in the form field...")
        assunto_field.clear()
        assunto_field.send_keys("doença rara")
        print("Text entered successfully!")
        
        # Wait a moment to see the text entered
        time.sleep(2)
        
        # Wait for the button to be clickable and click it
        print("Looking for the search button...")
        search_button = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div/div/div[2]/div/div/form/fieldset/div[7]/div/div/input[1]'))
        )
        
        print("Clicking the search button...")
        search_button.click()
        print("Button clicked successfully!")
        
        # Wait for the results to load
        print("Waiting for search results...")
        time.sleep(5)
        
        # Print the new URL after search
        print(f"Current URL after search: {driver.current_url}")
        
        # Save the initial search results page
        print("Saving initial search results...")
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_html_page(driver, f"search_results_initial_{timestamp}.html")
        
        #buttons(driver, timestamp)
        
        # Keep the browser open for a longer time to see the final results
        print("\nAll iterations completed! Keeping browser open for 10 seconds to view final results...")
        time.sleep(10)
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        
    finally:
        # Close the browser
        if driver:
            print("Closing browser...")
            driver.quit()

if __name__ == "__main__":
    access_saudelegis()

Setting up Chrome WebDriver...
Accessing: https://saudelegis.saude.gov.br/saudelegis/secure/norma/listPublic.xhtml
Waiting for page to load...
Page title: SAUDELEGIS - Sistema de Legislação da Saúde
Current URL: https://saudelegis.saude.gov.br/saudelegis/secure/norma/listPublic.xhtml
Page loaded successfully!
Looking for the 'assunto' form field...
Entering 'doenças raras' in the form field...
Text entered successfully!
Looking for the search button...
Clicking the search button...
Button clicked successfully!
Waiting for search results...
Current URL after search: https://saudelegis.saude.gov.br/saudelegis/secure/norma/listPublic.xhtml;jsessionid=HVbTcBEaVaCkD9u1Elb1nHbv
Saving initial search results...
HTML saved to: saudelegis_pages/search_results_initial_20250724_230308.html

All iterations completed! Keeping browser open for 10 seconds to view final results...
Closing browser...
