In [None]:
from fund_lens_etl.flows.extract_fec_contributions import extract_fec_contributions_flow

# Optional: Check Prefect is installed
import prefect

import logging

# Configure root logger to show everything
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# Also set the specific service logger
logging.getLogger('fund_lens_etl.services.fec_service').setLevel(logging.INFO)

print(f"Prefect version: {prefect.__version__}")

In [None]:
# Test extraction with limited results
# noinspection PyArgumentEqualDefault
stats = extract_fec_contributions_flow(
    state="MD",
    two_year_transaction_period=2024,
    max_results=500
)

print("\n=== Extraction Statistics ===")
print(f"State: {stats['state']}")
print(f"Cycle: {stats['cycle']}")
print(f"Contributions fetched: {stats['contributions_fetched']}")  # Changed
print(f"Contributions stored: {stats['contributions_stored']}")    # Changed
print(f"Raw filing ID: {stats['raw_filing_id']}")                  # Added
print(f"Duration: {stats['duration_seconds']:.2f} seconds")

In [None]:
# Fetch MORE records to get some new ones beyond the first 500
# noinspection PyArgumentEqualDefault
stats = extract_fec_contributions_flow(
    state="MD",
    two_year_transaction_period=2024,
    max_results=1000  # Double the amount
)

print("\n=== Extraction Statistics ===")
print(f"State: {stats['state']}")
print(f"Cycle: {stats['cycle']}")
print(f"Contributions fetched: {stats['contributions_fetched']}")
print(f"Contributions stored: {stats['contributions_stored']}")
print(f"Raw filing ID: {stats['raw_filing_id']}")
print(f"Duration: {stats['duration_seconds']:.2f} seconds")

In [None]:
# Test with 1500 records - should get 500 NEW ones
# noinspection PyArgumentEqualDefault
stats = extract_fec_contributions_flow(
    state="MD",
    two_year_transaction_period=2024,
    max_results=1500
)

print("\n=== Extraction Statistics ===")
print(f"State: {stats['state']}")
print(f"Cycle: {stats['cycle']}")
print(f"Contributions fetched: {stats['contributions_fetched']}")
print(f"Contributions stored: {stats['contributions_stored']}")  # Should be ~500
print(f"Raw filing ID: {stats['raw_filing_id']}")
print(f"Duration: {stats['duration_seconds']:.2f} seconds")

In [None]:
# Try Virginia instead
stats = extract_fec_contributions_flow(
    state="VA",
    two_year_transaction_period=2024,
    max_results=1000
)

print(f"\nVirginia: Fetched {stats['contributions_fetched']}, Stored {stats['contributions_stored']}")

In [None]:
# Test with 2000 records - this will have a NEW file hash
# noinspection PyArgumentEqualDefault
stats = extract_fec_contributions_flow(
    state="MD",
    two_year_transaction_period=2024,
    max_results=2000
)

print("\n=== Extraction Statistics ===")
print(f"State: {stats['state']}")
print(f"Cycle: {stats['cycle']}")
print(f"Contributions fetched: {stats['contributions_fetched']}")
print(f"Contributions stored: {stats['contributions_stored']}")  # Should be ~500
print(f"Raw filing ID: {stats['raw_filing_id']}")
print(f"Duration: {stats['duration_seconds']:.2f} seconds")

In [None]:
from sqlalchemy import create_engine, text
from fund_lens_etl.config import get_database_url

engine = create_engine(get_database_url())

with engine.connect() as conn:
    # Check raw filings
    raw_count = conn.execute(text("SELECT COUNT(*) FROM raw_filings")).scalar()
    print(f"Raw filings: {raw_count}")

    # Check staging contributions
    staging_count = conn.execute(text("SELECT COUNT(*) FROM fec_contributions_staging")).scalar()
    print(f"Staging contributions: {staging_count}")