In [None]:
# Cell 1: Setup
%load_ext autoreload
%autoreload 2

import pandas as pd
from sqlalchemy import select, func

from fund_lens_etl.database import get_db_session
from fund_lens_models.bronze import (
    BronzeMarylandCommittee,
    BronzeMarylandCandidate,
    BronzeMarylandContribution,
)
from fund_lens_models.silver import (
    SilverMarylandCommittee,
    SilverMarylandCandidate,
    SilverMarylandContribution,
)
from fund_lens_etl.transformers import (
    BronzeToSilverMarylandCommitteeTransformer,
    BronzeToSilverMarylandCandidateTransformer,
    BronzeToSilverMarylandContributionTransformer,
)
from fund_lens_etl.loaders.silver import (
    SilverMarylandCommitteeLoader,
    SilverMarylandCandidateLoader,
    SilverMarylandContributionLoader,
)

print("Imports successful")

In [None]:
# Cell 2: Check Bronze Layer Counts
print("="*60)
print("Bronze Layer Record Counts")
print("="*60)

with get_db_session() as session:
    committee_count = session.execute(
        select(func.count()).select_from(BronzeMarylandCommittee)
    ).scalar()
    
    candidate_count = session.execute(
        select(func.count()).select_from(BronzeMarylandCandidate)
    ).scalar()
    
    contribution_count = session.execute(
        select(func.count()).select_from(BronzeMarylandContribution)
    ).scalar()

print(f"  bronze_md_committee:    {committee_count:,} records")
print(f"  bronze_md_candidate:    {candidate_count:,} records")
print(f"  bronze_md_contribution: {contribution_count:,} records")

In [None]:
# Cell 3: Transform and Load Committees
print("="*60)
print("TEST 1: Transform Committees (Bronze -> Silver)")
print("="*60)

with get_db_session() as session:
    # Get sample Bronze committees
    stmt = select(BronzeMarylandCommittee).limit(50)
    bronze_committees = session.execute(stmt).scalars().all()
    
    print(f"\nFetched {len(bronze_committees)} bronze committees")
    
    # Get column names
    exclude_cols = {"created_at", "updated_at", "ingestion_timestamp", "source_system"}
    bronze_cols = [
        col.name for col in BronzeMarylandCommittee.__table__.columns.values()
        if col.name not in exclude_cols and col.name != "id"
    ]
    
    # Convert to DataFrame
    bronze_df = pd.DataFrame(
        [{col: getattr(c, col) for col in bronze_cols} for c in bronze_committees]
    )
    
    print(f"Bronze columns: {bronze_df.columns.tolist()}")
    
    # Transform
    transformer = BronzeToSilverMarylandCommitteeTransformer()
    silver_df = transformer.transform(bronze_df)
    
    print(f"\nTransformed to {len(silver_df)} silver records")
    print(f"Silver columns: {silver_df.columns.tolist()}")
    
    # Show sample
    print("\nSample transformed data:")
    print(silver_df[['source_ccf_id', 'name', 'committee_type', 'status', 'is_active']].head())

In [None]:
# Cell 4: Load Committees to Silver
print("="*60)
print("Load Committees to Silver")
print("="*60)

with get_db_session() as session:
    loader = SilverMarylandCommitteeLoader()
    loaded = loader.load(session, silver_df)
    print(f"\nLoaded {loaded} committees to silver")
    
    # Verify
    count = session.execute(
        select(func.count()).select_from(SilverMarylandCommittee)
    ).scalar()
    print(f"Total silver_md_committee records: {count}")

In [None]:
# Cell 5: Transform and Load Candidates
print("="*60)
print("TEST 2: Transform Candidates (Bronze -> Silver)")
print("="*60)

with get_db_session() as session:
    # Get sample Bronze candidates
    stmt = select(BronzeMarylandCandidate).limit(100)
    bronze_candidates = session.execute(stmt).scalars().all()
    
    print(f"\nFetched {len(bronze_candidates)} bronze candidates")
    
    # Get column names
    exclude_cols = {"created_at", "updated_at", "ingestion_timestamp", "source_system"}
    bronze_cols = [
        col.name for col in BronzeMarylandCandidate.__table__.columns.values()
        if col.name not in exclude_cols and col.name != "id"
    ]
    
    # Convert to DataFrame
    bronze_df = pd.DataFrame(
        [{col: getattr(c, col) for col in bronze_cols} for c in bronze_candidates]
    )
    
    # Transform (with session for committee enrichment)
    transformer = BronzeToSilverMarylandCandidateTransformer(session=session)
    silver_df = transformer.transform(bronze_df)
    
    print(f"\nTransformed to {len(silver_df)} silver records")
    print(f"Silver columns: {silver_df.columns.tolist()}")
    
    # Show sample
    print("\nSample transformed data:")
    print(silver_df[['name', 'party', 'office', 'status', 'is_active', 'committee_ccf_id']].head())
    
    # Check committee enrichment
    enriched_count = silver_df['committee_ccf_id'].notna().sum()
    print(f"\nCandidates with committee_ccf_id: {enriched_count}/{len(silver_df)}")

In [None]:
# Cell 6: Load Candidates to Silver
print("="*60)
print("Load Candidates to Silver")
print("="*60)

with get_db_session() as session:
    loader = SilverMarylandCandidateLoader()
    loaded = loader.load(session, silver_df)
    print(f"\nLoaded {loaded} candidates to silver")
    
    # Verify
    count = session.execute(
        select(func.count()).select_from(SilverMarylandCandidate)
    ).scalar()
    print(f"Total silver_md_candidate records: {count}")

In [None]:
# Cell 7: Transform and Load Contributions
print("="*60)
print("TEST 3: Transform Contributions (Bronze -> Silver)")
print("="*60)

with get_db_session() as session:
    # Get sample Bronze contributions
    stmt = select(BronzeMarylandContribution).limit(100)
    bronze_contributions = session.execute(stmt).scalars().all()
    
    print(f"\nFetched {len(bronze_contributions)} bronze contributions")
    
    # Get column names
    exclude_cols = {"created_at", "updated_at", "ingestion_timestamp", "source_system"}
    bronze_cols = [
        col.name for col in BronzeMarylandContribution.__table__.columns.values()
        if col.name not in exclude_cols and col.name != "id"
    ]
    
    # Convert to DataFrame
    bronze_df = pd.DataFrame(
        [{col: getattr(c, col) for col in bronze_cols} for c in bronze_contributions]
    )
    
    print(f"\nBronze sample (before transform):")
    print(bronze_df[['receiving_committee', 'contribution_date', 'contribution_amount', 'contributor_address']].head())
    
    # Transform (with session for committee enrichment)
    transformer = BronzeToSilverMarylandContributionTransformer(session=session)
    silver_df = transformer.transform(bronze_df)
    
    print(f"\nTransformed to {len(silver_df)} silver records")
    
    # Show transformed data with parsed fields
    print("\nSilver sample (after transform):")
    print(silver_df[['committee_name', 'contribution_date', 'contribution_amount', 'contributor_city', 'contributor_state', 'contributor_zip']].head())
    
    # Check enrichment and parsing
    print(f"\nParsing statistics:")
    print(f"  contribution_date parsed: {silver_df['contribution_date'].notna().sum()}/{len(silver_df)}")
    print(f"  contributor_city parsed: {silver_df['contributor_city'].notna().sum()}/{len(silver_df)}")
    print(f"  contributor_state parsed: {silver_df['contributor_state'].notna().sum()}/{len(silver_df)}")
    print(f"  contributor_zip parsed: {silver_df['contributor_zip'].notna().sum()}/{len(silver_df)}")
    print(f"  committee_ccf_id enriched: {silver_df['committee_ccf_id'].notna().sum()}/{len(silver_df)}")

In [None]:
# Cell 8: Load Contributions to Silver
print("="*60)
print("Load Contributions to Silver")
print("="*60)

with get_db_session() as session:
    loader = SilverMarylandContributionLoader()
    loaded = loader.load(session, silver_df)
    print(f"\nLoaded {loaded} contributions to silver")
    
    # Verify
    count = session.execute(
        select(func.count()).select_from(SilverMarylandContribution)
    ).scalar()
    print(f"Total silver_md_contribution records: {count}")

In [None]:
# Cell 9: Verify Silver Layer Data
print("="*60)
print("Verify Silver Layer Data")
print("="*60)

with get_db_session() as session:
    # Check committees
    print("\nSilver Committees:")
    stmt = select(SilverMarylandCommittee).limit(3)
    for comm in session.execute(stmt).scalars():
        print(f"  {comm.name[:40]}... (CCF: {comm.source_ccf_id}, Active: {comm.is_active})")
    
    # Check candidates
    print("\nSilver Candidates:")
    stmt = select(SilverMarylandCandidate).limit(3)
    for cand in session.execute(stmt).scalars():
        print(f"  {cand.name} - {cand.office} ({cand.party}, Active: {cand.is_active})")
    
    # Check contributions
    print("\nSilver Contributions:")
    stmt = select(SilverMarylandContribution).limit(3)
    for contrib in session.execute(stmt).scalars():
        print(f"  ${contrib.contribution_amount} from {contrib.contributor_name[:30]}... on {contrib.contribution_date}")
        print(f"    -> {contrib.committee_name[:40]}... (City: {contrib.contributor_city}, State: {contrib.contributor_state})")

In [None]:
# Cell 10: Summary Statistics
print("="*60)
print("SUMMARY: Maryland Silver Layer")
print("="*60)

with get_db_session() as session:
    # Bronze counts
    bronze_comm = session.execute(select(func.count()).select_from(BronzeMarylandCommittee)).scalar()
    bronze_cand = session.execute(select(func.count()).select_from(BronzeMarylandCandidate)).scalar()
    bronze_contrib = session.execute(select(func.count()).select_from(BronzeMarylandContribution)).scalar()
    
    # Silver counts
    silver_comm = session.execute(select(func.count()).select_from(SilverMarylandCommittee)).scalar()
    silver_cand = session.execute(select(func.count()).select_from(SilverMarylandCandidate)).scalar()
    silver_contrib = session.execute(select(func.count()).select_from(SilverMarylandContribution)).scalar()

print("\n{:<25} {:>15} {:>15}".format("Entity", "Bronze", "Silver"))
print("-" * 55)
print("{:<25} {:>15,} {:>15,}".format("Committees", bronze_comm, silver_comm))
print("{:<25} {:>15,} {:>15,}".format("Candidates", bronze_cand, silver_cand))
print("{:<25} {:>15,} {:>15,}".format("Contributions", bronze_contrib, silver_contrib))
print("-" * 55)
print("{:<25} {:>15,} {:>15,}".format("TOTAL", 
    bronze_comm + bronze_cand + bronze_contrib,
    silver_comm + silver_cand + silver_contrib))

In [None]:
# Cell 11: (Optional) Run Full Silver Transformation Flow
# Uncomment to run the complete Prefect flow

# from fund_lens_etl.flows import maryland_silver_transformation_flow
# 
# result = maryland_silver_transformation_flow()
# print("\nFlow Result:")
# print(result)