# EU Parliament - MEP Declarations Ingestion

**Data Source:** https://www.europarl.europa.eu/meps/en/declarations

## CLI Commands
```bash
mcli run eu-parliament run      # Run full ingestion
mcli run eu-parliament status   # Check status
mcli run eu-parliament list     # List downloaded declarations
```

In [None]:
import asyncio
import json
import logging
import sys
from datetime import datetime
from functools import wraps
from pathlib import Path
from typing import Optional

import click

project_root = Path.cwd().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))

from politician_trading.config import WorkflowConfig
from politician_trading.scrapers.scrapers import EUParliamentScraper

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
@click.group(name="eu-parliament")
def eu_parliament():
    """EU Parliament MEP declarations ingestion."""
    pass

def click_async(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        return asyncio.run(f(*args, **kwargs))
    return wrapper

config = WorkflowConfig.default()
scraping_config = config.scraping
OUTPUT_DIR = project_root / 'data' / 'raw' / 'eu_parliament'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
@eu_parliament.command(name="run")
@click.option('--output', default=None, help='Output file path')
@click_async
async def run_ingestion(output: Optional[str]):
    """Run EU Parliament MEP declarations ingestion."""
    click.echo("Starting EU Parliament ingestion...")
    
    scraper = EUParliamentScraper(scraping_config)
    async with scraper:
        disclosures = await scraper.scrape_mep_declarations()
    
    if not disclosures:
        click.echo("No declarations found.", err=True)
        return
    
    click.echo(f"Fetched {len(disclosures)} declarations")
    
    # Statistics
    countries = {}
    for d in disclosures:
        country = d.raw_data.get('country', 'Unknown')
        countries[country] = countries.get(country, 0) + 1
    
    click.echo("\nBy country:")
    for country, count in sorted(countries.items(), key=lambda x: x[1], reverse=True)[:5]:
        click.echo(f"  {country}: {count}")
    
    output_file = Path(output) if output else OUTPUT_DIR / f'eu_parliament_{datetime.now().strftime("%Y%m%d")}.json'
    
    serializable = []
    for d in disclosures:
        serializable.append({
            'asset_name': d.asset_name,
            'asset_type': d.asset_type,
            'transaction_type': d.transaction_type.value if d.transaction_type else None,
            'disclosure_date': d.disclosure_date.isoformat() if d.disclosure_date else None,
            'amount_min': float(d.amount_range_min) if d.amount_range_min else None,
            'source_url': d.source_url,
            'raw_data': d.raw_data,
        })
    
    with open(output_file, 'w') as f:
        json.dump({
            'metadata': {
                'source': 'eu_parliament',
                'downloaded_at': datetime.now().isoformat(),
                'total_records': len(disclosures),
                'by_country': countries,
            },
            'disclosures': serializable
        }, f, indent=2, default=str)
    click.echo(f"\nSaved to {output_file}")

In [None]:
@eu_parliament.command(name="status")
def check_status():
    """Check status of EU Parliament ingestion."""
    files = list(OUTPUT_DIR.glob('eu_parliament_*.json'))
    if files:
        latest = max(files, key=lambda p: p.stat().st_mtime)
        with open(latest) as f:
            data = json.load(f)
        metadata = data.get('metadata', {})
        click.echo(f"Latest: {latest.name}")
        click.echo(f"Records: {metadata.get('total_records', 'Unknown')}")
        click.echo(f"Countries: {len(metadata.get('by_country', {}))}")
    else:
        click.echo("No data found.")

In [None]:
@eu_parliament.command(name="list")
@click.option('--limit', default=10, type=int, help='Number of records')
@click.option('--country', default=None, help='Filter by country')
def list_declarations(limit: int, country: Optional[str]):
    """List downloaded EU Parliament declarations."""
    files = list(OUTPUT_DIR.glob('eu_parliament_*.json'))
    if not files:
        click.echo("No data found.")
        return
    latest = max(files, key=lambda p: p.stat().st_mtime)
    with open(latest) as f:
        data = json.load(f)
    disclosures = data.get('disclosures', [])
    if country:
        disclosures = [d for d in disclosures if country.lower() in d.get('raw_data', {}).get('country', '').lower()]
    click.echo(f"Showing {min(limit, len(disclosures))} of {len(disclosures)}:\n")
    for d in disclosures[:limit]:
        mep = d.get('raw_data', {}).get('politician_name', 'Unknown')
        country_name = d.get('raw_data', {}).get('country', 'Unknown')
        click.echo(f"  {mep} ({country_name}): {d.get('asset_name', 'N/A')[:40]}")