# UK Parliament - Register of Interests Ingestion

**Data Source:** https://interests-api.parliament.uk/api/v1

## CLI Commands
```bash
mcli run uk-parliament run      # Run full ingestion
mcli run uk-parliament status   # Check status
mcli run uk-parliament list     # List downloaded interests
```

In [None]:
import asyncio
import json
import logging
import sys
from datetime import datetime
from functools import wraps
from pathlib import Path
from typing import Optional

import click

project_root = Path.cwd().parent.parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root / 'src'))

from politician_trading.config import WorkflowConfig
from politician_trading.scrapers.scrapers_uk import run_uk_parliament_collection

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
@click.group(name="uk-parliament")
def uk_parliament():
    """UK Parliament Register of Interests ingestion."""
    pass

def click_async(f):
    @wraps(f)
    def wrapper(*args, **kwargs):
        return asyncio.run(f(*args, **kwargs))
    return wrapper

config = WorkflowConfig.default()
scraping_config = config.scraping
OUTPUT_DIR = project_root / 'data' / 'raw' / 'uk_parliament'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
@uk_parliament.command(name="run")
@click.option('--output', default=None, help='Output file path')
@click_async
async def run_ingestion(output: Optional[str]):
    """Run UK Parliament interests ingestion."""
    click.echo("Starting UK Parliament ingestion...")
    disclosures = await run_uk_parliament_collection(scraping_config)
    
    if not disclosures:
        click.echo("No interests found.", err=True)
        return
    
    click.echo(f"Fetched {len(disclosures)} interests")
    
    # Statistics
    categories = {}
    for d in disclosures:
        cat = d.raw_data.get('category_name', 'Unknown')
        categories[cat] = categories.get(cat, 0) + 1
    
    click.echo("\nBy category:")
    for cat, count in sorted(categories.items(), key=lambda x: x[1], reverse=True)[:5]:
        click.echo(f"  {cat}: {count}")
    
    output_file = Path(output) if output else OUTPUT_DIR / f'uk_parliament_{datetime.now().strftime("%Y%m%d")}.json'
    
    serializable = []
    for d in disclosures:
        serializable.append({
            'id': d.id,
            'asset_name': d.asset_name,
            'asset_type': d.asset_type,
            'transaction_type': d.transaction_type.value if d.transaction_type else None,
            'disclosure_date': d.disclosure_date.isoformat() if d.disclosure_date else None,
            'amount_min': float(d.amount_range_min) if d.amount_range_min else None,
            'source_url': d.source_url,
            'raw_data': d.raw_data,
        })
    
    with open(output_file, 'w') as f:
        json.dump({
            'metadata': {
                'source': 'uk_parliament',
                'downloaded_at': datetime.now().isoformat(),
                'total_records': len(disclosures),
                'by_category': categories,
            },
            'disclosures': serializable
        }, f, indent=2, default=str)
    click.echo(f"\nSaved to {output_file}")

In [None]:
@uk_parliament.command(name="status")
def check_status():
    """Check status of UK Parliament ingestion."""
    files = list(OUTPUT_DIR.glob('uk_parliament_*.json'))
    if files:
        latest = max(files, key=lambda p: p.stat().st_mtime)
        with open(latest) as f:
            data = json.load(f)
        metadata = data.get('metadata', {})
        click.echo(f"Latest: {latest.name}")
        click.echo(f"Records: {metadata.get('total_records', 'Unknown')}")
        click.echo(f"Categories: {len(metadata.get('by_category', {}))}")
    else:
        click.echo("No data found.")

In [None]:
@uk_parliament.command(name="list")
@click.option('--limit', default=10, type=int, help='Number of records')
@click.option('--category', default=None, help='Filter by category')
def list_interests(limit: int, category: Optional[str]):
    """List downloaded UK Parliament interests."""
    files = list(OUTPUT_DIR.glob('uk_parliament_*.json'))
    if not files:
        click.echo("No data found.")
        return
    latest = max(files, key=lambda p: p.stat().st_mtime)
    with open(latest) as f:
        data = json.load(f)
    disclosures = data.get('disclosures', [])
    if category:
        disclosures = [d for d in disclosures if category.lower() in d.get('raw_data', {}).get('category_name', '').lower()]
    click.echo(f"Showing {min(limit, len(disclosures))} of {len(disclosures)}:\n")
    for d in disclosures[:limit]:
        mp = d.get('raw_data', {}).get('politician_name', 'Unknown')
        click.echo(f"  {mp}: {d.get('asset_name', 'N/A')[:50]}")