# Quiver Quant Election Contributions Scraper

This notebook scrapes all political contribution data from QuiverQuant.

**What it does:**
- Gets the main table with all companies
- Clicks into each company's detail page
- Extracts all contribution data
- Saves everything to an Excel file

**Requirements:**
```bash
pip install selenium openpyxl pandas webdriver-manager
```

## Setup & Imports

In [7]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openpyxl]1/2[0m [openpyxl]
[1A[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import TimeoutException, NoSuchElementException, StaleElementReferenceException
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from datetime import datetime

## Configuration

Adjust these settings as needed

In [9]:
# Main settings
BASE_URL = "https://www.quiverquant.com/election-contributions/"
OUTPUT_FILE = "quiver_contributions.xlsx"

# Timing settings (in seconds)
PAGE_TIMEOUT = 30      # Max time to wait for page load
DELAY_BETWEEN = 1.5    # Delay between requests (be nice to the server)

# Browser settings
HEADLESS = True        # Set to False to see the browser window

# Optional: limit number of companies (None = all)
MAX_COMPANIES = None   # Change to 10 for testing

## Browser Setup

Configure Chrome with anti-detection features

In [10]:
def setup_driver():
    """
    Sets up Chrome webdriver with stealth options to avoid detection
    """
    options = webdriver.ChromeOptions()
    
    # Run headless (no visible window)
    if HEADLESS:
        options.add_argument('--headless=new')
    
    # Basic options for stability
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--window-size=1920,1080')
    
    # Mimic a real browser
    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
    
    # Hide automation flags
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    # Initialize driver (auto-downloads correct chromedriver)
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    
    # Remove webdriver property
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    return driver

# Initialize the driver
print("Setting up browser...")
driver = setup_driver()
print("✓ Browser ready")

Setting up browser...
✓ Browser ready


## Step 1: Extract Main Table

Get the list of all companies from the main page

In [11]:
def extract_main_table(driver):
    """
    Scrapes the main table with all companies
    Returns: list of dicts with company data
    """
    print("\nGetting main company list...")
    print(f"URL: {BASE_URL}")
    
    # Load the page
    driver.get(BASE_URL)
    wait = WebDriverWait(driver, PAGE_TIMEOUT)
    time.sleep(3)  # Let the page fully load
    
    try:
        # Find the main table
        table = wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
        time.sleep(2)
        
        # Get all rows
        rows = table.find_elements(By.TAG_NAME, "tr")
        print(f"Found {len(rows)} rows in main table")
        
        companies_data = []
        
        # Skip header row (start at index 1)
        for idx, row in enumerate(rows[1:], 1):
            try:
                # Get all cells in this row
                cols = row.find_elements(By.TAG_NAME, "td")
                if len(cols) == 0:
                    continue
                
                # First column has the company link
                company_cell = cols[0]
                link = company_cell.find_element(By.TAG_NAME, "a")
                
                company_name = link.text.strip()
                company_url = link.get_attribute('href')
                
                if not company_name or not company_url:
                    continue
                
                # Build row data
                row_data = {
                    'company_name': company_name,
                    'company_url': company_url
                }
                
                # Add other columns from the main table
                for i, col in enumerate(cols[1:], 1):
                    row_data[f'main_col_{i}'] = col.text.strip()
                
                companies_data.append(row_data)
                print(f"  [{len(companies_data)}] {company_name}")
                
                # Stop if we hit the limit
                if MAX_COMPANIES and len(companies_data) >= MAX_COMPANIES:
                    print(f"\n⚠ Reached limit of {MAX_COMPANIES} companies")
                    break
                
            except StaleElementReferenceException:
                # Element changed, skip it
                continue
            except Exception as e:
                print(f"  Error on row {idx}: {str(e)}")
                continue
        
        print(f"\n✓ Extracted {len(companies_data)} companies")
        return companies_data
    
    except TimeoutException:
        print("✗ Timeout loading main table")
        return []
    except Exception as e:
        print(f"✗ Error: {str(e)}")
        return []

# Run it
companies = extract_main_table(driver)


Getting main company list...
URL: https://www.quiverquant.com/election-contributions/
Found 102 rows in main table
  [1] HON
Honeywell International Inc
  [2] NOC
Northrop Grumman Corp.
  [3] HD
Home Depot, Inc.
  [4] BA
Boeing Co.
  [5] CHTR
Charter Communications Inc. - Ordinary Shares - Class A
  [6] ELV
Elevance Health Inc
  [7] UPS
United Parcel Service, Inc. - Ordinary Shares - Class B
  [8] GD
General Dynamics Corp.
  [9] CMCSV
Comcast Corporation Class A Common Stock Ex-Distribution When Issued
  [10] LMT
Lockheed Martin Corp.
  [11] AFL
Aflac Inc.
  [12] WMT
Walmart Inc
  [13] NEE
NextEra Energy Inc
  [14] LHX
L3Harris Technologies Inc
  [15] T
AT&T, Inc.
  [16] GM
General Motors Company
  [17] GOOGL
Alphabet Inc - Ordinary Shares - Class A
  [18] VLO
Valero Energy Corp.
  [19] DELL
Dell Technologies Inc - Ordinary Shares - Class C
  [20] TMUS
T-Mobile US Inc
  [21] ETR
Entergy Corp.
  [22] VZ
Verizon Communications Inc
  [23] AMGN
AMGEN Inc.
  [24] HGTY
Hagerty Inc - Ordinar

## Step 2: Extract Contributions for Each Company

Click into each company page and get all their political contributions

In [12]:
def extract_company_contributions(driver, company_url, company_name, index, total):
    """
    Scrapes contribution data from a single company's detail page
    Returns: list of dicts with contribution records
    """
    print(f"\n[{index}/{total}] Processing: {company_name}")
    print("-" * 70)
    
    try:
        # Navigate to company page
        driver.get(company_url)
        wait = WebDriverWait(driver, PAGE_TIMEOUT)
        time.sleep(2)
        
        # Find the contributions table
        table = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "table")))
        time.sleep(1)
        
        rows = table.find_elements(By.TAG_NAME, "tr")
        
        if len(rows) == 0:
            print("  ⚠ No rows found")
            return []
        
        # Extract headers from first row
        headers = []
        header_row = rows[0]
        header_cells = header_row.find_elements(By.TAG_NAME, "th")
        if not header_cells:
            header_cells = header_row.find_elements(By.TAG_NAME, "td")
        
        for cell in header_cells:
            header_text = cell.text.strip()
            if header_text:
                headers.append(header_text)
            else:
                headers.append(f"Column_{len(headers)+1}")
        
        # Extract data rows
        contributions = []
        
        for row in rows[1:]:
            try:
                cols = row.find_elements(By.TAG_NAME, "td")
                if len(cols) == 0:
                    continue
                
                # Build record
                row_data = {'Company': company_name}
                
                for i, col in enumerate(cols):
                    header = headers[i] if i < len(headers) else f'Column_{i+1}'
                    cell_text = col.text.strip()
                    row_data[header] = cell_text
                
                contributions.append(row_data)
            
            except StaleElementReferenceException:
                continue
            except Exception as e:
                continue
        
        print(f"  ✓ Extracted {len(contributions)} contributions")
        return contributions
    
    except TimeoutException:
        print(f"  ✗ Timeout loading {company_name}")
        return []
    except Exception as e:
        print(f"  ✗ Error: {str(e)}")
        return []

# Process all companies
if companies:
    print(f"\n{'='*70}")
    print(f"Processing {len(companies)} companies...")
    print(f"{'='*70}")
    
    all_contributions = []
    
    for i, company in enumerate(companies, 1):
        company_name = company['company_name']
        company_url = company['company_url']
        
        # Scrape this company
        contributions = extract_company_contributions(
            driver, company_url, company_name, i, len(companies)
        )
        
        # Add to master list
        all_contributions.extend(contributions)
        
        # Be nice to the server
        time.sleep(DELAY_BETWEEN)
    
    print(f"\n{'='*70}")
    print(f"✓ Total contributions extracted: {len(all_contributions)}")
    print(f"{'='*70}")
else:
    print("\n✗ No companies to process")
    all_contributions = []


Processing 100 companies...

[1/100] Processing: HON
Honeywell International Inc
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[2/100] Processing: NOC
Northrop Grumman Corp.
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[3/100] Processing: HD
Home Depot, Inc.
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[4/100] Processing: BA
Boeing Co.
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[5/100] Processing: CHTR
Charter Communications Inc. - Ordinary Shares - Class A
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[6/100] Processing: ELV
Elevance Health Inc
----------------------------------------------------------------------
  ✓ Extracted 101 contributions

[7/100] Processing: UPS
United Parcel Service, I

## Step 3: Save to Excel

Create a professional Excel file with 3 sheets

In [None]:
def save_to_excel(companies_data, all_contributions, filename):
    """
    Saves data to Excel with professional formatting
    Creates 3 sheets: Companies, Contributions, Summary
    """
    print(f"\nCreating Excel file: {filename}")
    
    wb = Workbook()
    
    # --- SHEET 1: Company List ---
    ws1 = wb.active
    ws1.title = "Companies"
    
    if companies_data:
        # Headers
        headers = list(companies_data[0].keys())
        ws1.append(headers)
        
        # Format header row
        header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
        header_font = Font(bold=True, color="FFFFFF", size=11)
        header_alignment = Alignment(horizontal="center", vertical="center")
        
        for cell in ws1[1]:
            cell.fill = header_fill
            cell.font = header_font
            cell.alignment = header_alignment
        
        # Add data rows
        for company in companies_data:
            row = [company.get(h, '') for h in headers]
            ws1.append(row)
        
        # Auto-adjust column widths
        for column in ws1.columns:
            max_length = 0
            column_letter = column[0].column_letter
            for cell in column:
                try:
                    if cell.value and len(str(cell.value)) > max_length:
                        max_length = len(str(cell.value))
                except:
                    pass
            adjusted_width = min(max_length + 3, 60)
            ws1.column_dimensions[column_letter].width = adjusted_width
    
    # --- SHEET 2: All Contributions ---
    ws2 = wb.create_sheet("Contributions")
    
    if all_contributions:
        # Headers
        headers = list(all_contributions[0].keys())
        ws2.append(headers)
        
        # Format header row
        for cell in ws2[1]:
            cell.fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
            cell.font = Font(bold=True, color="FFFFFF", size=11)
            cell.alignment = Alignment(horizontal="center", vertical="center")
        
        # Add data rows
        for contribution in all_contributions:
            row = [contribution.get(h, '') for h in headers]
            ws2.append(row)
        
        # Auto-adjust column widths
        for column in ws2.columns:
            max_length = 0
            column_letter = column[0].column_letter
            for cell in column:
                try:
                    if cell.value and len(str(cell.value)) > max_length:
                        max_length = len(str(cell.value))
                except:
                    pass
            adjusted_width = min(max_length + 3, 60)
            ws2.column_dimensions[column_letter].width = adjusted_width
    
    # --- SHEET 3: Summary ---
    ws3 = wb.create_sheet("Summary")
    ws3.append(['DATA SUMMARY'])
    ws3.append([''])
    ws3.append(['Metric', 'Value'])
    ws3.append(['Total Companies', len(companies_data)])
    ws3.append(['Total Contributions', len(all_contributions)])
    ws3.append(['Extraction Date', datetime.now().strftime('%Y-%m-%d %H:%M:%S')])
    
    ws3['A1'].font = Font(bold=True, size=14)
    for row in ws3.iter_rows(min_row=3, max_row=3):
        for cell in row:
            cell.fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
            cell.font = Font(bold=True, color="FFFFFF")
    
    ws3.column_dimensions['A'].width = 30
    ws3.column_dimensions['B'].width = 40
    
    # Save file
    wb.save(filename)
    print(f"✓ File saved successfully")
    print(f"  - Sheet 1: {len(companies_data)} companies")
    print(f"  - Sheet 2: {len(all_contributions)} contributions")
    print(f"  - Sheet 3: Summary stats")

# Save the data
if companies and all_contributions:
    save_to_excel(companies, all_contributions, OUTPUT_FILE)
else:
    print("\n⚠ No data to save")

## Cleanup

Close the browser

In [None]:
# Close browser
driver.quit()
print("\n✓ Browser closed")

# Final summary
print("\n" + "="*70)
print("SCRAPING COMPLETE")
print("="*70)
print(f"Companies processed: {len(companies)}")
print(f"Contributions extracted: {len(all_contributions)}")
print(f"Output file: {OUTPUT_FILE}")
print("="*70)

## Optional: Quick Data Preview

In [None]:
# Preview companies data
if companies:
    print("First 5 companies:")
    df_companies = pd.DataFrame(companies)
    display(df_companies.head())

In [None]:
# Preview contributions data
if all_contributions:
    print("First 10 contributions:")
    df_contributions = pd.DataFrame(all_contributions)
    display(df_contributions.head(10))

In [None]:
# Basic stats
if all_contributions:
    df_contributions = pd.DataFrame(all_contributions)
    print("\nContributions per company:")
    print(df_contributions['Company'].value_counts())