In [22]:
from playwright.async_api import async_playwright

async def extract_florida_statute_links():
    results = []
    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=False)  # Firefox works for this site
        page = await browser.new_page()

        # Navigate to Florida Statutes site
        await page.goto("https://www.flsenate.gov/laws/statutes", wait_until="domcontentloaded")

        # Fill in the search field with 'energy'
        await page.fill("input#filteredData_StatuteSearchQuery", "energy")
        await page.click("input[name='StatutesGoSubmit']")

        # Wait for the results table to load
        await page.wait_for_selector("table.tbl.width100 a", timeout=10000)

        # Extract all links from the first column of the table
        link_elements = await page.query_selector_all("table.tbl.width100 a")
        for link in link_elements:
            href = await link.get_attribute("href")
            text = await link.inner_text()
            if href:
                full_url = f"https://www.flsenate.gov{href}"
                results.append((text.strip(), full_url))

        await browser.close()
        return results

# Jupyter Notebook: Run with
section_links = await extract_florida_statute_links()
print(f"Extracted {len(section_links)} links.")

Extracted 306 links.


In [24]:
section_links

[('17.57', 'https://www.flsenate.gov/Laws/Statutes/2024/17.57'),
 ('20.255', 'https://www.flsenate.gov/Laws/Statutes/2024/20.255'),
 ('20.60', 'https://www.flsenate.gov/Laws/Statutes/2024/20.60'),
 ('73.0715', 'https://www.flsenate.gov/Laws/Statutes/2024/73.0715'),
 ('119.0713', 'https://www.flsenate.gov/Laws/Statutes/2024/119.0713'),
 ('159.27', 'https://www.flsenate.gov/Laws/Statutes/2024/159.27'),
 ('161.73', 'https://www.flsenate.gov/Laws/Statutes/2024/161.73'),
 ('161.74', 'https://www.flsenate.gov/Laws/Statutes/2024/161.74'),
 ('163.01', 'https://www.flsenate.gov/Laws/Statutes/2024/163.01'),
 ('163.04', 'https://www.flsenate.gov/Laws/Statutes/2024/163.04'),
 ('163.08', 'https://www.flsenate.gov/Laws/Statutes/2024/163.08'),
 ('163.3163', 'https://www.flsenate.gov/Laws/Statutes/2024/163.3163'),
 ('163.3177', 'https://www.flsenate.gov/Laws/Statutes/2024/163.3177'),
 ('163.3205', 'https://www.flsenate.gov/Laws/Statutes/2024/163.3205'),
 ('163.32051', 'https://www.flsenate.gov/Laws/St

In [32]:
import pandas as pd

# for a flat list:
H = list(set(section_links))
df = pd.DataFrame(H, columns=["Statute","Document Link"])
df.to_csv("florida_energy_data_unfiltered.csv", index=False)

# # or for list-of-lists with headers:
# df = pd.DataFrame(rows[1:], columns=rows[0])
# df.to_csv("output.csv", index=False)
