In [21]:
state = "VA"
import os

In [22]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# --- CONFIG ---
URL = f"https://filingaccess.serff.com/sfa/home/{state}"
TABLE_BODY = "tbody#j_idt25\\:filingTable_data"
WAIT_TIME = 15
SCRAPE_INTERVAL = 0.5  # seconds between scrapes
CSV_FILE = f"insurance_filings_auto_pagination_{state}.csv"

options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(URL)

# --- Manual filter setup ---
print("‚è≥ Please set filters manually within 20 seconds...")
time.sleep(20)

wait = WebDriverWait(driver, WAIT_TIME)

# --- Helper: extract current table page ---
def extract_table_with_page():
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, f"{TABLE_BODY} tr")))
    time.sleep(10)
    

    try:
        current_page_text = driver.find_element(By.CSS_SELECTOR, "span.ui-paginator-current").text
        current_page = int(current_page_text.split(" ")[0].replace("(", ""))
    except:
        current_page = None

    rows = driver.find_elements(By.CSS_SELECTOR, f"{TABLE_BODY} tr")
    data = []
    for r in rows:
        cols = r.find_elements(By.TAG_NAME, "td")
        row_data = [c.text.strip() for c in cols]
        row_data.append(current_page)
        data.append(row_data)
    return data

headers = [
    "Company Name",
    "NAIC Company Code",
    "Insurance Product Name",
    "Sub Type Of Insurance",
    "Filing Type",
    "Filing Status",
    "SERFF Tracking Number",
    "Page Number"
]

all_data = []
page_counter = 1

print("üöÄ Scraping started with auto-pagination...")

try:
    while True:
        # --- Scrape current page ---
        if page_counter == 1:
            print("‚è≥ Waiting 10s to set entries per page...")
            time.sleep(10)
        try:
            page_data = extract_table_with_page()
            if page_data:
                all_data.extend(page_data)
                df = pd.DataFrame(all_data, columns=headers)
                df.to_csv(f"csvs/{CSV_FILE}", index=False)
                print(f"Page {page_counter} scraped: {len(page_data)} rows. Total: {len(all_data)}")
            else:
                print(f"‚ö†Ô∏è No rows found on page {page_counter}.")
        except Exception as e:
            print("‚ö†Ô∏è Error extracting table:", e)

        # --- Locate and click Next button ---
        try:
            next_button = driver.find_element(By.CSS_SELECTOR, "a.ui-paginator-next")

            if "ui-state-disabled" in next_button.get_attribute("class"):
                print("‚úÖ No more pages. Pagination ended.")
                break

            # Get current page number before click
            current_page_text = driver.find_element(By.CSS_SELECTOR, "span.ui-paginator-current").text

            next_button.click()
            print(f"‚û°Ô∏è Moving to next page ({page_counter + 1})...")

            # Wait until page number text changes
            WebDriverWait(driver, WAIT_TIME).until(
                lambda d: d.find_element(By.CSS_SELECTOR, "span.ui-paginator-current").text != current_page_text
            )

            # Give a short delay for stability
            time.sleep(SCRAPE_INTERVAL)
            page_counter += 1

        except Exception as e:
            print("‚ö†Ô∏è Pagination click failed:", e)
            break


except KeyboardInterrupt:
    print("üõë Stopped manually.")

finally:
    df = pd.DataFrame(all_data, columns=headers)
    df.to_csv(f"csvs/{CSV_FILE}", index=False)
    print(f"\n‚úÖ Done. Total rows scraped: {len(all_data)}")
    driver.quit()


‚è≥ Please set filters manually within 20 seconds...
üöÄ Scraping started with auto-pagination...
‚è≥ Waiting 10s to set entries per page...
Page 1 scraped: 100 rows. Total: 100
‚û°Ô∏è Moving to next page (2)...
Page 2 scraped: 100 rows. Total: 200
‚û°Ô∏è Moving to next page (3)...
Page 3 scraped: 100 rows. Total: 300
‚û°Ô∏è Moving to next page (4)...
Page 4 scraped: 100 rows. Total: 400
‚û°Ô∏è Moving to next page (5)...
Page 5 scraped: 100 rows. Total: 500
‚û°Ô∏è Moving to next page (6)...
Page 6 scraped: 100 rows. Total: 600
‚û°Ô∏è Moving to next page (7)...
Page 7 scraped: 100 rows. Total: 700
‚û°Ô∏è Moving to next page (8)...
Page 8 scraped: 100 rows. Total: 800
‚û°Ô∏è Moving to next page (9)...
Page 9 scraped: 100 rows. Total: 900
‚û°Ô∏è Moving to next page (10)...
Page 10 scraped: 100 rows. Total: 1000
‚û°Ô∏è Moving to next page (11)...
Page 11 scraped: 100 rows. Total: 1100
‚û°Ô∏è Moving to next page (12)...
Page 12 scraped: 100 rows. Total: 1200
‚û°Ô∏è Moving to next page (13)