In [1]:
from playwright.async_api import async_playwright

async def extract_maine_energy_links():
    results = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()

        # Step 1: Go to the Maine Legislature search page
        await page.goto("https://legislature.maine.gov/statutes/search.htm", wait_until="load")
        
        # Wait for the search input field to be available
        await page.wait_for_selector("input[name='querytext']", timeout=60000)

        # Fill the search input field with the term "energy"
        await page.fill("input[name='querytext']", "energy")
        
        # Press Enter to submit the search
        await page.press("input[name='querytext']", "Enter")

        # Step 2: Wait for the result list to load (selector for the list of results)
        await page.wait_for_selector("ul.results", timeout=10000)

        # Step 3: Extract the links from the list items
        links = await page.query_selector_all("ul.results li a")

        # Collect the URLs and any additional information if needed
        for link in links:
            href = await link.get_attribute("href")
            text = await link.inner_text()
            # If the link is relative, prepend the base URL
            if href and not href.startswith("http"):
                href = f"https://legislature.maine.gov{href}"
            results.append((text.strip(), href))

        await browser.close()
        return results

# To run the function in Jupyter directly:
section_links = await extract_maine_energy_links()
print(section_links[:5])  # Print first 5 links
print(f"Total links: {len(section_links)}")


[('Title 38, §480-HH:\xa0General permit for offshore wind energy demonstration project', 'https://legislature.maine.gov/legis/statutes/38/title38sec480-HH.html'), ('Title 35-A, §10104:\xa0Duties', 'https://legislature.maine.gov/legis/statutes/35-A/title35-Asec10104.html'), ("Title 2, §9:\xa0Governor's Energy Office", 'https://legislature.maine.gov/legis/statutes/2/title2sec9.html'), ('Title 35-A, §3210-I:\xa0Northern Maine Renewable Energy Development Program', 'https://legislature.maine.gov/legis/statutes/35-A/title35-Asec3210-I.html'), ('Title 35-A, §3210:\xa0Renewable resources', 'https://legislature.maine.gov/legis/statutes/35-A/title35-Asec3210.html')]
Total links: 651


In [3]:
import pandas as pd

# for a flat list:
H = list(set(section_links))
df = pd.DataFrame(H, columns=["Statute","Document Link"])
df['state'] = 'Maine'
df.to_csv("maine_energy_data_unfiltered.csv", index=False)