In [None]:
# Cell 1: Setup
%load_ext autoreload
%autoreload 2

import pandas as pd
from datetime import date, timedelta
from fund_lens_etl.clients.maryland import MarylandCRISClient, MarylandSBEClient
from fund_lens_etl.extractors.maryland import (
    MarylandContributionExtractor,
    MarylandCommitteeExtractor,
    MarylandCandidateExtractor,
)
from fund_lens_etl.loaders.bronze import (
    BronzeMarylandContributionLoader,
    BronzeMarylandCommitteeLoader,
    BronzeMarylandCandidateLoader,
)
from fund_lens_etl.database import get_db_session

print("✓ Imports successful")

In [None]:
# Cell 2: Test Committee Extract + Load
print("="*60)
print("TEST 1: Extract & Load Committees")
print("="*60)

# Extract
cris_client = MarylandCRISClient()
committee_extractor = MarylandCommitteeExtractor(client=cris_client)
committee_df = committee_extractor.extract(status="A")

print(f"Extracted {len(committee_df)} active committees")
print(f"Columns: {committee_df.columns.tolist()}")

# Load first 20 for testing
test_df = committee_df.head(20)
committee_loader = BronzeMarylandCommitteeLoader()
with get_db_session() as session:
    loaded = committee_loader.load(session, test_df)
    print(f"\n✓ Loaded {loaded} committees to bronze")

In [None]:
# Cell 3: Verify Committees in Database
print("="*60)
print("Verify Committees in Database")
print("="*60)

with get_db_session() as session:
    from fund_lens_models.bronze import BronzeMarylandCommittee
    from sqlalchemy import select

    stmt = select(BronzeMarylandCommittee).limit(5)
    committees = session.execute(stmt).scalars().all()

    print(f"Found {len(committees)} committees in database")
    for committee in committees:
        print(f"\n  {committee.committee_name}")
        print(f"    CCF ID: {committee.ccf_id}")
        print(f"    Type: {committee.committee_type}")
        print(f"    Status: {committee.committee_status}")
        print(f"    Registered: {committee.registered_date}")

In [None]:
# Cell 4: Test Contribution Extract + Load
print("="*60)
print("TEST 2: Extract & Load Contributions (Last 30 Days)")
print("="*60)

# Extract recent contributions
contribution_extractor = MarylandContributionExtractor(client=cris_client)
end_date = date.today()
start_date = end_date - timedelta(days=30)

print(f"Date range: {start_date} to {end_date}")

contribution_df = contribution_extractor.extract(
    start_date=start_date,
    end_date=end_date,
)

print(f"Extracted {len(contribution_df)} contributions")

# Load all (should be manageable for 30 days)
contribution_loader = BronzeMarylandContributionLoader()
with get_db_session() as session:
    loaded = contribution_loader.load(session, contribution_df)
    print(f"\n✓ Loaded {loaded} contributions to bronze")

In [None]:
# Cell 5: Verify Contributions in Database
print("="*60)
print("Verify Contributions in Database")
print("="*60)

with get_db_session() as session:
    from fund_lens_models.bronze import BronzeMarylandContribution

    stmt = select(BronzeMarylandContribution).limit(5)
    contributions = session.execute(stmt).scalars().all()

    print(f"Found {len(contributions)} contributions in database")
    for contrib in contributions:
        print(f"\n  {contrib.contributor_name}")
        print(f"    Committee: {contrib.receiving_committee[:40]}...")
        print(f"    Amount: ${contrib.contribution_amount}")
        print(f"    Date: {contrib.contribution_date}")
        print(f"    Type: {contrib.contribution_type}")

In [None]:
# Cell 6: Test Candidate Extract + Load
print("="*60)
print("TEST 3: Extract & Load Candidates (2026)")
print("="*60)

# Extract
sbe_client = MarylandSBEClient()
candidate_extractor = MarylandCandidateExtractor(client=sbe_client)
candidate_df = candidate_extractor.extract(year=2026)

print(f"Extracted {len(candidate_df)} candidates")

# Load all
candidate_loader = BronzeMarylandCandidateLoader()
with get_db_session() as session:
    loaded = candidate_loader.load(session, candidate_df)
    print(f"\n✓ Loaded {loaded} candidates to bronze")

In [None]:
# Cell 7: Verify Candidates in Database
print("="*60)
print("Verify Candidates in Database")
print("="*60)

with get_db_session() as session:
    from fund_lens_models.bronze import BronzeMarylandCandidate

    stmt = select(BronzeMarylandCandidate).limit(5)
    candidates = session.execute(stmt).scalars().all()

    print(f"Found {len(candidates)} candidates in database")
    for candidate in candidates:
        print(f"\n  {candidate.candidate_first_name} {candidate.candidate_last_name}")
        print(f"    Office: {candidate.office_name}")
        print(f"    Party: {candidate.party}")
        print(f"    Status: {candidate.status}")
        print(f"    Election: {candidate.election_year} {candidate.election_type}")

In [None]:
# Cell 8: Test Upsert (Re-load Same Data)
print("="*60)
print("TEST 4: Verify Upsert Behavior (Re-load Same Data)")
print("="*60)

# Count before
with get_db_session() as session:
    from sqlalchemy import func
    
    count_before = session.execute(
        select(func.count()).select_from(BronzeMarylandCommittee)
    ).scalar()
    print(f"Committees before re-load: {count_before}")

# Re-load same committees
with get_db_session() as session:
    loaded = committee_loader.load(session, test_df)
    print(f"Re-loaded: {loaded} committees")

# Count after
with get_db_session() as session:
    count_after = session.execute(
        select(func.count()).select_from(BronzeMarylandCommittee)
    ).scalar()
    print(f"Committees after re-load: {count_after}")

if count_before == count_after:
    print("\n✓ Upsert working correctly - no duplicates created!")
else:
    print(f"\n✗ Warning: Count changed from {count_before} to {count_after}")

In [None]:
# Cell 9: Summary Statistics
print("="*60)
print("SUMMARY: Maryland Bronze Tables")
print("="*60)

with get_db_session() as session:
    from sqlalchemy import func
    from fund_lens_models.bronze import (
        BronzeMarylandContribution,
        BronzeMarylandCommittee,
        BronzeMarylandCandidate,
    )

    committee_count = session.execute(
        select(func.count()).select_from(BronzeMarylandCommittee)
    ).scalar()
    
    contribution_count = session.execute(
        select(func.count()).select_from(BronzeMarylandContribution)
    ).scalar()
    
    candidate_count = session.execute(
        select(func.count()).select_from(BronzeMarylandCandidate)
    ).scalar()

    print(f"\n  bronze_md_committee:    {committee_count:,} records")
    print(f"  bronze_md_contribution: {contribution_count:,} records")
    print(f"  bronze_md_candidate:    {candidate_count:,} records")
    print(f"\n  Total:                  {committee_count + contribution_count + candidate_count:,} records")