In [4]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

driver = uc.Chrome(headless=False)
driver.get("https://doge.gov/payments")
wait = WebDriverWait(driver, 20)
time.sleep(5)  # Let JS render fully

# Step 1: Click CDC2253 div
try:
    cdc_div = wait.until(EC.presence_of_element_located((
        By.XPATH,
        '//div[contains(@class, "cursor-pointer") and contains(., "CDC2253")]'
    )))
    ActionChains(driver).move_to_element(cdc_div).click().perform()
    print("✅ Clicked CDC2253")
    time.sleep(3)
except Exception as e:
    print("❌ Failed to click CDC2253:", e)
    driver.quit()
    raise

# Step 2: Click "View All" (may also be a <div>)
try:
    view_all_div = wait.until(EC.presence_of_element_located((
        By.XPATH,
        '//*[contains(@class, "cursor-pointer") and contains(., "View All")]'
    )))
    ActionChains(driver).move_to_element(view_all_div).click().perform()
    print("✅ Clicked View All")
    time.sleep(3)
except Exception as e:
    print("⚠️ View All not clickable:", e)

# Step 3: Scrape first table
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

tables = soup.find_all("table")
if not tables:
    print("❌ No tables found.")
else:
    table = tables[0]
    headers = [th.get_text(strip=True) for th in table.find_all('th')]
    rows = []
    for tr in table.find_all('tr')[1:]:
        cells = tr.find_all('td')
        row = [td.get_text(strip=True) for td in cells]
        if row:
            rows.append(row)

    df = pd.DataFrame(rows, columns=headers)
    print(f"✅ Scraped CDC2253 table with {len(df)} rows")
    print(df.head())

✅ Clicked CDC2253
✅ Clicked View All
✅ Scraped CDC2253 table with 500 rows
                                         AGENCY  \
0  HHS-CENTERS FOR DISEASE CONTROL & PREVENTION   
1  HHS-CENTERS FOR DISEASE CONTROL & PREVENTION   
2  HHS-CENTERS FOR DISEASE CONTROL & PREVENTION   
3  HHS-CENTERS FOR DISEASE CONTROL & PREVENTION   
4  HHS-CENTERS FOR DISEASE CONTROL & PREVENTION   

                          RECIPIENT                  AWARD DESCRIPTION  \
0  California Dept of Public Health  State - Other Health Organization   
1  California Dept of Public Health  State - Other Health Organization   
2  California Dept of Public Health  State - Other Health Organization   
3  California Dept of Public Health  State - Other Health Organization   
4  California Dept of Public Health  State - Other Health Organization   

  PAYMENT DATE   PAYMENT  
0     3/6/2025  $489,314  
1     3/6/2025    $9,418  
2     3/6/2025   $47,632  
3     3/6/2025    $4,514  
4     3/6/2025    $1,490  


In [5]:
df.to_csv("cdc2253_payments.csv", index=False)
print("📁 Saved table to cdc2253_payments.csv")

📁 Saved table to cdc2253_payments.csv


In [6]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

driver = uc.Chrome(headless=False)
driver.get("https://doge.gov/payments")
wait = WebDriverWait(driver, 20)
time.sleep(5)

# Step 1: Click CDC2253
try:
    cdc_div = wait.until(EC.presence_of_element_located((
        By.XPATH,
        '//div[contains(@class, "cursor-pointer") and contains(., "CDC2253")]'
    )))
    ActionChains(driver).move_to_element(cdc_div).click().perform()
    print("✅ Clicked CDC2253")
    time.sleep(3)
except Exception as e:
    print("❌ CDC2253 click failed:", e)
    driver.quit()
    raise

# Step 2: Click "View All"
try:
    view_all = wait.until(EC.presence_of_element_located((
        By.XPATH,
        '//*[contains(@class, "cursor-pointer") and contains(., "View All")]'
    )))
    ActionChains(driver).move_to_element(view_all).click().perform()
    print("✅ Clicked View All")
    time.sleep(3)
except Exception as e:
    print("⚠️ View All not found or not clickable:", e)

# Step 3: Loop through pagination and collect table data
all_rows = []
headers = None

while True:
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    tables = soup.find_all("table")

    if not tables:
        print("❌ No tables found.")
        break

    table = tables[0]
    if not headers:
        headers = [th.get_text(strip=True) for th in table.find_all('th')]

    for tr in table.find_all('tr')[1:]:
        cells = [td.get_text(strip=True) for td in tr.find_all('td')]
        if cells:
            all_rows.append(cells)

    # Step 4: Try to click "Next"
    try:
        next_button = driver.find_element(By.XPATH, '//button[contains(., "Next") and not(@disabled)]')
        ActionChains(driver).move_to_element(next_button).click().perform()
        print("➡️ Clicked Next")
        time.sleep(3)
    except:
        print("✅ No more pages. Scraping complete.")
        break

# Step 5: Convert to DataFrame and save
df = pd.DataFrame(all_rows, columns=headers)
df.to_csv("cdc2253_payments_full.csv", index=False)
print(f"📁 Saved {len(df)} rows to cdc2253_payments_full.csv")
driver.quit()

✅ Clicked CDC2253
✅ Clicked View All
➡️ Clicked Next
➡️ Clicked Next
➡️ Clicked Next
➡️ Clicked Next
✅ No more pages. Scraping complete.
📁 Saved 2253 rows to cdc2253_payments_full.csv
