In [None]:
from fund_lens_etl.clients.fec import FECAPIClient
from fund_lens_etl.config import USState

# Initialize client
client = FECAPIClient()

state = "MD"
cycle = 2026

In [None]:
# Get total count for committees
response = client.get("/committees/", params={
    "state": state,
    "cycle": cycle,
    "per_page": 1  # Just need the count, not the data
})

committee_count = response.get("pagination", {}).get("count", 0)
committee_pages = response.get("pagination", {}).get("pages", 0)

print(f"Total Committees (MD, {cycle}): {committee_count:,}")
print(f"Pages (100 per page): {committee_pages:,}")

In [None]:
# Get total count for candidates
response = client.get("/candidates/", params={
    "state": state,
    "cycle": cycle,
    "per_page": 1
})

candidate_count = response.get("pagination", {}).get("count", 0)
candidate_pages = response.get("pagination", {}).get("pages", 0)

print(f"Total Candidates (MD, {cycle}): {candidate_count:,}")
print(f"Pages (100 per page): {candidate_pages:,}")

In [None]:
# First, get candidate committees for MD
committees_response = client.get("/committees/", params={
    "state": state,
    "cycle": cycle,
    "per_page": 100
})

# Filter for candidate committees (House/Senate, Principal/Authorized)
candidate_committees = []
for committee in committees_response.get("results", []):
    committee_type = committee.get("committee_type")
    designation = committee.get("designation")

    if committee_type in ["H", "S"] and designation in ["P", "A"]:
        candidate_committees.append({
            "committee_id": committee.get("committee_id"),
            "name": committee.get("name"),
            "type": committee_type,
        })

print(f"Found {len(candidate_committees)} candidate committees (H/S, P/A)")
print("\nSample committees:")
for comm in candidate_committees[:5]:
    print(f"  - {comm['committee_id']}: {comm['name']} ({comm['type']})")

In [None]:
import time
from requests.exceptions import ReadTimeout, HTTPError

# Check contribution counts for each committee
total_contributions = 0
committee_details = []
errors = []

print(f"Checking {len(candidate_committees)} committees...\n")

for i, comm in enumerate(candidate_committees, 1):
    try:
        print(f"[{i}/{len(candidate_committees)}] Checking {comm['committee_id']}...", end=" ")

        response = client.get("/schedules/schedule_a/", params={
            "committee_id": comm["committee_id"],
            "two_year_transaction_period": cycle,
            "per_page": 1
        })

        count = response.get("pagination", {}).get("count", 0)
        pages = response.get("pagination", {}).get("pages", 0)

        total_contributions += count

        committee_details.append({
            "committee_id": comm["committee_id"],
            "name": comm["name"],
            "type": comm["type"],
            "contributions": count,
            "pages": pages,
        })

        print(f"✓ {count:,} contributions ({pages:,} pages)")

        # Small delay to avoid rate limiting
        time.sleep(0.1)

    except ReadTimeout:
        print(f"⏱️  TIMEOUT - likely HUGE volume!")
        errors.append({
            "committee_id": comm["committee_id"],
            "name": comm["name"],
            "error": "ReadTimeout - potentially 10K+ contributions"
        })

    except HTTPError as e:
        print(f"❌ HTTP Error: {e}")
        errors.append({
            "committee_id": comm["committee_id"],
            "name": comm["name"],
            "error": str(e)
        })

print(f"\n{'='*60}")
print(f"Successfully checked: {len(committee_details)} committees")
print(f"Errors/Timeouts: {len(errors)} committees")
print(f"TOTAL CONTRIBUTIONS (from successful checks): {total_contributions:,}")
print(f"{'='*60}")

if errors:
    print("\n⚠️  Committees that timed out (likely high volume):")
    for err in errors:
        print(f"  - {err['committee_id']}: {err['name']}")
        print(f"    {err['error']}")

In [None]:
if errors:
    print("Attempting to estimate volume for timed-out committees...\n")

    for err in errors:
        committee_id = err["committee_id"]
        print(f"Checking {committee_id} with date filters...")

        try:
            # Try getting just contributions from the last 30 days
            from datetime import date, timedelta

            end_date = date.today()
            start_date = end_date - timedelta(days=30)

            response = client.get("/schedules/schedule_a/", params={
                "committee_id": committee_id,
                "two_year_transaction_period": cycle,
                "min_date": start_date.isoformat(),
                "max_date": end_date.isoformat(),
                "per_page": 1
            })

            count_30days = response.get("pagination", {}).get("count", 0)
            print(f"  Last 30 days: {count_30days:,} contributions")
            print(f"  Estimated annual rate: ~{count_30days * 12:,} contributions/year")

        except Exception as e:
            print(f"  Still timing out even with date filter: {e}")

In [None]:
# Sort by contribution volume
committee_details_sorted = sorted(committee_details, key=lambda x: x['contributions'], reverse=True)

print("\nTop 5 Committees by Contribution Volume:")
print(f"{'Committee':<15} {'Name':<40} {'Contributions':>15} {'Pages':>8}")
print("-" * 80)
for comm in committee_details_sorted[:5]:
    name_truncated = comm['name'][:38] + ".." if len(comm['name']) > 40 else comm['name']
    print(f"{comm['committee_id']:<15} {name_truncated:<40} {comm['contributions']:>15,} {comm['pages']:>8,}")

# Calculate statistics
if committee_details:
    contributions_list = [c['contributions'] for c in committee_details]
    avg_contributions = sum(contributions_list) / len(contributions_list)
    max_contributions = max(contributions_list)

    print(f"\nStatistics:")
    print(f"  Average contributions per committee: {avg_contributions:,.0f}")
    print(f"  Maximum contributions (single committee): {max_contributions:,}")
    print(f"  Total pages to fetch: {sum(c['pages'] for c in committee_details):,}")