# Third-Party Aggregators - Financial Disclosure Ingestion

**Data Sources:** QuiverQuant, ProPublica, StockNear

## CLI Commands
```bash
mcli run third-party run            # Run all aggregators
mcli run third-party run --quiver   # QuiverQuant only
mcli run third-party run --propub   # ProPublica only
mcli run third-party status         # Check status
```

In [None]:
import asyncio
import json
import logging
import os
import sys
from datetime import datetime
from functools import wraps
from pathlib import Path
from typing import Optional

import click

project_root = Path.cwd().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))

from politician_trading.config import WorkflowConfig
from politician_trading.scrapers.scrapers import QuiverQuantScraper
from politician_trading.scrapers.scrapers_third_party import ThirdPartyDataFetcher

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
@click.group(name="third-party")
def third_party():
    """Third-party aggregator data ingestion."""
    pass

def click_async(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        return asyncio.run(f(*args, **kwargs))
    return wrapper

config = WorkflowConfig.default()
scraping_config = config.scraping
OUTPUT_DIR = project_root / 'data' / 'raw' / 'third_party'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
@third_party.command(name="run")
@click.option('--quiver', is_flag=True, help='Scrape QuiverQuant only')
@click.option('--propub', is_flag=True, help='Scrape ProPublica only')
@click.option('--output', default=None, help='Output file path')
@click_async
async def run_ingestion(quiver: bool, propub: bool, output: Optional[str]):
    """Run third-party aggregator ingestion."""
    all_disclosures = []
    by_source = {}
    
    # If no specific flag, run all
    run_all = not quiver and not propub
    
    # QuiverQuant
    if quiver or run_all:
        click.echo("Scraping QuiverQuant...")
        scraper = QuiverQuantScraper(scraping_config)
        async with scraper:
            trades = await scraper.scrape_congress_trades()
        click.echo(f"  QuiverQuant: {len(trades)} trades")
        for t in trades:
            all_disclosures.append({'source': 'quiverquant', **t})
        by_source['quiverquant'] = len(trades)
    
    # ProPublica
    if propub or run_all:
        api_key = os.getenv('PROPUBLICA_API_KEY')
        if api_key:
            click.echo("Fetching ProPublica...")
            fetcher = ThirdPartyDataFetcher(propublica_api_key=api_key)
            data = fetcher.fetch_from_propublica()
            count = len(data.get('disclosures', []))
            click.echo(f"  ProPublica: {count} disclosures")
            for d in data.get('disclosures', []):
                all_disclosures.append({'source': 'propublica', 'data': str(d)})
            by_source['propublica'] = count
        else:
            click.echo("  ProPublica: Skipped (no API key)")
    
    click.echo(f"\nTotal: {len(all_disclosures)} records")
    
    output_file = Path(output) if output else OUTPUT_DIR / f'third_party_{datetime.now().strftime("%Y%m%d")}.json'
    with open(output_file, 'w') as f:
        json.dump({
            'metadata': {
                'sources': list(by_source.keys()),
                'downloaded_at': datetime.now().isoformat(),
                'total_records': len(all_disclosures),
                'by_source': by_source,
            },
            'disclosures': all_disclosures
        }, f, indent=2, default=str)
    click.echo(f"Saved to {output_file}")

In [None]:
@third_party.command(name="status")
def check_status():
    """Check status of third-party ingestion."""
    files = list(OUTPUT_DIR.glob('third_party_*.json'))
    if files:
        latest = max(files, key=lambda p: p.stat().st_mtime)
        with open(latest) as f:
            data = json.load(f)
        metadata = data.get('metadata', {})
        click.echo(f"Latest: {latest.name}")
        click.echo(f"Records: {metadata.get('total_records', 'Unknown')}")
        click.echo(f"Sources: {metadata.get('by_source', {})}")
    else:
        click.echo("No data found.")