In [None]:
# Cell 1: Setup
%load_ext autoreload
%autoreload 2

import pandas as pd
from fund_lens_etl.database import get_db_session
from fund_lens_etl.models.bronze.fec import BronzeFECScheduleA
from fund_lens_etl.transformers.bronze_to_silver import BronzeToSilverFECTransformer
from sqlalchemy import select

print("✓ Imports successful")

In [None]:
# Cell 2: Load Bronze Data
print("="*60)
print("Load Bronze Schedule A Data")
print("="*60)

with get_db_session() as session:
    # Get all contributions from bronze
    stmt = select(BronzeFECScheduleA)
    result = session.execute(stmt)

    # Convert to DataFrame
    bronze_data = []
    for row in result.scalars():
        bronze_data.append({
            'sub_id': row.sub_id,
            'transaction_id': row.transaction_id,
            'file_number': row.file_number,
            'contribution_receipt_date': row.contribution_receipt_date,
            'contribution_receipt_amount': row.contribution_receipt_amount,
            'contributor_aggregate_ytd': row.contributor_aggregate_ytd,
            'contributor_name': row.contributor_name,
            'contributor_first_name': row.contributor_first_name,
            'contributor_last_name': row.contributor_last_name,
            'contributor_city': row.contributor_city,
            'contributor_state': row.contributor_state,
            'contributor_zip': row.contributor_zip,
            'contributor_employer': row.contributor_employer,
            'contributor_occupation': row.contributor_occupation,
            'entity_type': row.entity_type,
            'committee_id': row.committee_id,
            'recipient_committee_designation': row.recipient_committee_designation,
            'receipt_type': row.receipt_type,
            'election_type': row.election_type,
            'memo_text': row.memo_text,
            'two_year_transaction_period': row.two_year_transaction_period,
            'report_year': row.report_year,
            'raw_json': row.raw_json,
        })

    bronze_df = pd.DataFrame(bronze_data)

print(f"Loaded {len(bronze_df)} bronze records")
print(f"\nColumns: {bronze_df.columns.tolist()}")
print(f"\nSample:")
print(bronze_df.head(2)[['sub_id', 'contributor_name', 'contribution_receipt_amount', 'committee_id']])

In [None]:
# Cell 3: Transform Bronze → Silver
print("="*60)
print("Transform Bronze → Silver with Enrichment")
print("="*60)

with get_db_session() as session:
    # Initialize transformer with session
    transformer = BronzeToSilverFECTransformer(session=session)

    # Transform
    silver_df = transformer.transform(bronze_df)

print(f"\n✓ Transformation complete: {len(silver_df)} records")
print(f"\nNew columns added:")
new_cols = [col for col in silver_df.columns if col not in bronze_df.columns]
print(new_cols)

In [None]:
# Cell 4: Verify Committee Enrichment
print("="*60)
print("Verify Committee Enrichment")
print("="*60)

enriched_sample = silver_df[['sub_id', 'committee_id', 'committee_name', 'committee_type', 'committee_party']].head(5)
print(enriched_sample.to_string())

# Check how many got enriched
total = len(silver_df)
with_committee_name = silver_df['committee_name'].notna().sum()

print(f"\n✓ Enrichment stats:")
print(f"  Total contributions: {total}")
print(f"  With committee name: {with_committee_name} ({with_committee_name/total*100:.1f}%)")

In [None]:
# Cell 5: Verify Candidate Enrichment
print("="*60)
print("Verify Candidate Enrichment")
print("="*60)

enriched_sample = silver_df[['sub_id', 'committee_id', 'candidate_id', 'candidate_name', 'candidate_office', 'candidate_party']].head(5)
print(enriched_sample.to_string())

# Check how many got enriched
with_candidate_name = silver_df['candidate_name'].notna().sum()

print(f"\n✓ Candidate enrichment stats:")
print(f"  Total contributions: {total}")
print(f"  With candidate name: {with_candidate_name} ({with_candidate_name/total*100:.1f}%)")

In [None]:
# Cell 6: Compare OLD vs NEW approach
print("="*60)
print("OLD (JSON parsing) vs NEW (JOIN) Comparison")
print("="*60)

# Show a record with all enriched fields
sample = silver_df[silver_df['candidate_name'].notna()].iloc[0]

print("Sample enriched contribution:")
print(f"  Contributor: {sample['contributor_name']}")
print(f"  Amount: ${sample['contribution_amount']}")
print(f"  Date: {sample['contribution_date']}")
print(f"\n  Committee ID: {sample['committee_id']}")
print(f"  Committee Name: {sample['committee_name']}")
print(f"  Committee Type: {sample['committee_type']}")
print(f"  Committee Party: {sample['committee_party']}")
print(f"\n  Candidate ID: {sample['candidate_id']}")
print(f"  Candidate Name: {sample['candidate_name']}")
print(f"  Candidate Office: {sample['candidate_office']}")
print(f"  Candidate Party: {sample['candidate_party']}")

print(f"\n✓ NEW approach successfully JOINs with bronze reference tables!")