In [None]:
# Cell 1: Setup
%load_ext autoreload
%autoreload 2

import pandas as pd
from sqlalchemy import select, func, text

from fund_lens_etl.database import get_db_session
from fund_lens_models.silver import (
    SilverMarylandCommittee,
    SilverMarylandCandidate,
    SilverMarylandContribution,
)
from fund_lens_models.gold import (
    GoldContributor,
    GoldCommittee,
    GoldCandidate,
    GoldContribution,
)

print("Imports successful")

In [None]:
# Cell 2: Check Silver Layer Counts
print("="*60)
print("Maryland Silver Layer Record Counts")
print("="*60)

with get_db_session() as session:
    committee_count = session.execute(
        select(func.count()).select_from(SilverMarylandCommittee)
    ).scalar()
    
    candidate_count = session.execute(
        select(func.count()).select_from(SilverMarylandCandidate)
    ).scalar()
    
    contribution_count = session.execute(
        select(func.count()).select_from(SilverMarylandContribution)
    ).scalar()

print(f"  silver_md_committee:    {committee_count:,} records")
print(f"  silver_md_candidate:    {candidate_count:,} records")
print(f"  silver_md_contribution: {contribution_count:,} records")

In [None]:
# Cell 3: Check Gold Layer Counts (Before)
print("="*60)
print("Gold Layer Record Counts (Before MD Transform)")
print("="*60)

with get_db_session() as session:
    contributor_count = session.execute(
        select(func.count()).select_from(GoldContributor)
    ).scalar()
    
    committee_count = session.execute(
        select(func.count()).select_from(GoldCommittee)
    ).scalar()
    
    candidate_count = session.execute(
        select(func.count()).select_from(GoldCandidate)
    ).scalar()
    
    contribution_count = session.execute(
        select(func.count()).select_from(GoldContribution)
    ).scalar()
    
    # Check MD-specific counts
    md_committee_count = session.execute(
        select(func.count()).select_from(GoldCommittee)
        .where(GoldCommittee.state_committee_id.isnot(None))
    ).scalar()
    
    md_contribution_count = session.execute(
        select(func.count()).select_from(GoldContribution)
        .where(GoldContribution.source_system == 'MARYLAND')
    ).scalar()

print(f"  gold_contributor:   {contributor_count:,} records")
print(f"  gold_committee:     {committee_count:,} records (MD: {md_committee_count:,})")
print(f"  gold_candidate:     {candidate_count:,} records")
print(f"  gold_contribution:  {contribution_count:,} records (MD: {md_contribution_count:,})")

In [None]:
# Cell 4: Run Maryland Gold Transformation Flow
print("="*60)
print("Running Maryland Gold Transformation Flow")
print("="*60)

from fund_lens_etl.flows import maryland_gold_transformation_flow

result = maryland_gold_transformation_flow(chunksize=5000)

print("\nFlow Result:")
print(f"  Success: {result['success']}")

In [None]:
# Cell 5: Check Gold Layer Counts (After)
print("="*60)
print("Gold Layer Record Counts (After MD Transform)")
print("="*60)

with get_db_session() as session:
    contributor_count = session.execute(
        select(func.count()).select_from(GoldContributor)
    ).scalar()
    
    committee_count = session.execute(
        select(func.count()).select_from(GoldCommittee)
    ).scalar()
    
    candidate_count = session.execute(
        select(func.count()).select_from(GoldCandidate)
    ).scalar()
    
    contribution_count = session.execute(
        select(func.count()).select_from(GoldContribution)
    ).scalar()
    
    # Check MD-specific counts
    md_committee_count = session.execute(
        select(func.count()).select_from(GoldCommittee)
        .where(GoldCommittee.state_committee_id.isnot(None))
    ).scalar()
    
    md_candidate_count = session.execute(
        select(func.count()).select_from(GoldCandidate)
        .where(GoldCandidate.state_candidate_id.isnot(None))
    ).scalar()
    
    md_contribution_count = session.execute(
        select(func.count()).select_from(GoldContribution)
        .where(GoldContribution.source_system == 'MARYLAND')
    ).scalar()

print(f"  gold_contributor:   {contributor_count:,} records")
print(f"  gold_committee:     {committee_count:,} records (MD: {md_committee_count:,})")
print(f"  gold_candidate:     {candidate_count:,} records (MD: {md_candidate_count:,})")
print(f"  gold_contribution:  {contribution_count:,} records (MD: {md_contribution_count:,})")

In [None]:
# Cell 6: Verify Maryland Data in Gold Layer
print("="*60)
print("Sample Maryland Data in Gold Layer")
print("="*60)

with get_db_session() as session:
    # Sample committees
    print("\nMaryland Committees:")
    stmt = select(GoldCommittee).where(GoldCommittee.state_committee_id.isnot(None)).limit(5)
    for comm in session.execute(stmt).scalars():
        print(f"  {comm.name[:50]}... (CCF: {comm.state_committee_id}, Type: {comm.committee_type})")
    
    # Sample candidates
    print("\nMaryland Candidates:")
    stmt = select(GoldCandidate).where(GoldCandidate.state_candidate_id.isnot(None)).limit(5)
    for cand in session.execute(stmt).scalars():
        print(f"  {cand.name} - {cand.office} ({cand.party})")
    
    # Sample contributions
    print("\nMaryland Contributions:")
    stmt = select(GoldContribution).where(GoldContribution.source_system == 'MARYLAND').limit(5)
    for contrib in session.execute(stmt).scalars():
        print(f"  ${contrib.amount} on {contrib.contribution_date} (Committee ID: {contrib.recipient_committee_id})")

In [None]:
# Cell 7: Maryland Contribution Statistics
print("="*60)
print("Maryland Contribution Statistics")
print("="*60)

with get_db_session() as session:
    # Total amount
    total_result = session.execute(
        text("""
        SELECT 
            COUNT(*) as count,
            SUM(amount) as total,
            AVG(amount) as avg,
            MIN(contribution_date) as min_date,
            MAX(contribution_date) as max_date
        FROM gold_contribution
        WHERE source_system = 'MARYLAND'
        """)
    ).fetchone()
    
    if total_result:
        print(f"\n  Total Contributions: {total_result[0]:,}")
        print(f"  Total Amount: ${total_result[1]:,.2f}" if total_result[1] else "  Total Amount: $0")
        print(f"  Average Amount: ${total_result[2]:,.2f}" if total_result[2] else "  Average Amount: $0")
        print(f"  Date Range: {total_result[3]} to {total_result[4]}")
    
    # By contribution type
    print("\n  By Contribution Type:")
    type_result = session.execute(
        text("""
        SELECT contribution_type, COUNT(*), SUM(amount)
        FROM gold_contribution
        WHERE source_system = 'MARYLAND'
        GROUP BY contribution_type
        ORDER BY SUM(amount) DESC
        LIMIT 10
        """)
    ).fetchall()
    
    for row in type_result:
        print(f"    {row[0]}: {row[1]:,} contributions (${row[2]:,.2f})")

In [None]:
# Cell 8: Top Maryland Recipients
print("="*60)
print("Top 10 Maryland Recipients by Total Contributions")
print("="*60)

with get_db_session() as session:
    result = session.execute(
        text("""
        SELECT 
            c.name,
            c.committee_type,
            COUNT(gc.id) as contribution_count,
            SUM(gc.amount) as total_amount
        FROM gold_contribution gc
        JOIN gold_committee c ON gc.recipient_committee_id = c.id
        WHERE gc.source_system = 'MARYLAND'
        GROUP BY c.id, c.name, c.committee_type
        ORDER BY SUM(gc.amount) DESC
        LIMIT 10
        """)
    ).fetchall()
    
    print("\n{:<50} {:>12} {:>15}".format("Committee", "Count", "Total"))
    print("-" * 80)
    for row in result:
        name = row[0][:47] + "..." if len(row[0]) > 50 else row[0]
        print("{:<50} {:>12,} ${:>14,.2f}".format(name, row[2], row[3]))

In [None]:
# Cell 9: Summary
print("="*60)
print("SUMMARY: Maryland Gold Transformation")
print("="*60)

print("\nâœ“ Maryland data successfully integrated into unified Gold layer")
print("\nKey points:")
print("  - Committees stored with state_committee_id = CCF ID")
print("  - Candidates stored with state_candidate_id = content_hash")
print("  - Contributions stored with source_system = 'MARYLAND'")
print("  - Contributors deduplicated and merged with FEC contributors")
print("\nThe Gold layer now contains both FEC and Maryland data!")