In [28]:
from playwright.async_api import async_playwright

async def extract_arizona_energy_links():
    results = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()

        # Step 1: Go to the ARS Title page and search for "energy"
        await page.goto("https://www.azleg.gov/arstitle", wait_until="domcontentloaded")
        await page.fill("input[placeholder='ARS Search']", "energy")
        await page.press("input[placeholder='ARS Search']", "Enter")

        # Step 2: Wait for the result table to load
        await page.wait_for_selector("table#searchResultsTable", timeout=10000)

        # Step 3: Select all visible rows
        rows = await page.query_selector_all("table#searchResultsTable tbody tr")

        for row in rows:
            # Extract the statute label from the last column
            statute_text_el = await row.query_selector("td:nth-child(4)")
            statute_text = await statute_text_el.inner_text() if statute_text_el else "N/A"

            # Find all HTML links in the row and use the last one
            html_links = await row.query_selector_all("a:text('HTML')")
            if html_links:
                last_html_link = html_links[-1]
                href = await last_html_link.get_attribute("href")
                full_url = f"https://www.azleg.gov{href}" if href.startswith("/") else href
                results.append((statute_text.strip(), full_url))

        await browser.close()
        return results

# To run in Jupyter:
section_links = await extract_arizona_energy_links()
print(section_links[:5])
print(f"Total links: {len(section_links)}")


[('15-213.01 - Procurement practices; guaranteed energy cost savings contracts; definitions', 'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/15/00213-01.htm&CiRestriction=energy'), ('42-5159 - Exemptions', 'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/42/05159.htm&CiRestriction=energy'), ('34-105 - Guaranteed energy cost savings contracts; definitions', 'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/34/00105.htm&CiRestriction=energy'), ('15-910.02 - Energy and water savings accounts', 'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/15/00910-02.htm&CiRestriction=energy'), ('42-5061 - Retail classification; definitions', 'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/42/05061.htm&CiRestriction=energy')]
Total links: 222


In [43]:
section_links

[('15-213.01 - Procurement practices; guaranteed energy cost savings contracts; definitions',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/15/00213-01.htm&CiRestriction=energy'),
 ('42-5159 - Exemptions',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/42/05159.htm&CiRestriction=energy'),
 ('34-105 - Guaranteed energy cost savings contracts; definitions',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/34/00105.htm&CiRestriction=energy'),
 ('15-910.02 - Energy and water savings accounts',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/15/00910-02.htm&CiRestriction=energy'),
 ('42-5061 - Retail classification; definitions',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/42/05061.htm&CiRestriction=energy'),
 ('15-213.03 - Procurement practices; guaranteed energy production contracts; definitions',
  'http://www.azleg.gov/search/oop/qfullhit.asp?CiWebHitsFile=/ars/15/00213-03.htm&CiRestrict

In [53]:
import pandas as pd

# for a flat list:
H = list(set(section_links))
df = pd.DataFrame(H, columns=["Statute","Document Link"])
df.to_csv("arizona_energy_data_unfiltered.csv", index=False)

# # or for list-of-lists with headers:
# df = pd.DataFrame(rows[1:], columns=rows[0])
# df.to_csv("output.csv", index=False)
