# FOIA-Free Content Pipeline — Colab Runner

Run the full pipeline from Google Colab.  
Set your API keys in the cells below, then run each stage.

In [None]:
# 1. Install dependencies
!pip install -q pyyaml python-dotenv openai requests feedparser beautifulsoup4 yt-dlp
!apt-get -qq install -y ffmpeg

In [None]:
# 2. Clone or mount repo
import os

# Option A: Clone from GitHub
# !git clone https://github.com/YOUR_USER/NEWS--VIEWS.git
# os.chdir('NEWS--VIEWS')

# Option B: Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir('/content/drive/MyDrive/NEWS--VIEWS')

print('Working directory:', os.getcwd())

In [None]:
# 3. Set API keys
import os

# REQUIRED: Set your API keys here
os.environ['OPENROUTER_API_KEY'] = ''  # Your OpenRouter API key
os.environ['YOUTUBE_API_KEY'] = ''     # Your YouTube Data API v3 key
os.environ['BRAVE_API_KEY'] = ''       # Your Brave Search API key (https://brave.com/search/api/)

print('API keys configured.' if os.environ.get('OPENROUTER_API_KEY') else 'WARNING: Set API keys above!')

In [None]:
# 4. Initialize database
from scripts.db import init_db
from scripts.config_loader import ensure_dirs, load_policy, load_sources

ensure_dirs()
init_db()

policy = load_policy()
sources = load_sources()
print(f'Policy: {len(policy)} sections')
print(f'Sources: {len(sources)} feeds ({len([s for s in sources if s.get("enabled")])} enabled)')

In [None]:
# 5a. INGEST — Pull new candidates (YouTube + RSS + pages)
from scripts.run_pipeline import run_ingest

ingest_results = run_ingest(days=7, dry_run=False)
ingest_results

In [None]:
# 5b. ENRICH — Add transcripts and entities
from scripts.run_pipeline import run_enrich

enrich_results = run_enrich(limit=200, dry_run=False)
enrich_results

In [None]:
# 5c. TRIAGE — Score and classify candidates
from scripts.run_pipeline import run_triage

triage_results = run_triage(limit=200, dry_run=False)
triage_results

In [None]:
# 6. Review PASS candidates
from scripts.db import get_connection, get_candidates
import json

conn = get_connection()
pass_candidates = get_candidates(conn, status='PASS', limit=50)
print(f'PASS candidates: {len(pass_candidates)}')
print()

for i, c in enumerate(pass_candidates[:20], 1):
    print(f'{i:2d}. [{c["triage_score"]:3d}] {c["title"][:80]}')
    print(f'    Type: {c["incident_type"]} | URL: {c["url"]}')
    print(f'    Reason: {(c.get("triage_rationale") or "")[:100]}')
    print()

conn.close()

In [None]:
# 7. CORROBORATE — Gather supporting sources for PASS candidates
from scripts.run_pipeline import run_corroborate

corr_results = run_corroborate(limit=20, dry_run=False)
corr_results

In [None]:
# 8. PACKAGE — Build case bundles (timeline, narration, shorts plan)
from scripts.run_pipeline import run_package

package_results = run_package(limit=5, dry_run=False)
package_results

In [None]:
# 9. RENDER — Download, cut, caption, export
from scripts.run_pipeline import run_render

render_results = run_render(limit=3, dry_run=False)
render_results

In [None]:
# 10. Pipeline stats
from scripts.db import get_connection

conn = get_connection()
print('=== Pipeline Stats ===')
for table in ['candidates', 'cases', 'corroboration_sources']:
    count = conn.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
    print(f'  {table}: {count} rows')

print()
print('Triage distribution:')
for status in ['NEW', 'PASS', 'MAYBE', 'KILL']:
    count = conn.execute(
        'SELECT COUNT(*) FROM candidates WHERE triage_status = ?', (status,)
    ).fetchone()[0]
    print(f'  {status}: {count}')

print()
print('Case status distribution:')
for status in ['APPROVED', 'PACKAGED', 'RENDERED', 'READY_TO_PUBLISH']:
    count = conn.execute(
        'SELECT COUNT(*) FROM cases WHERE status = ?', (status,)
    ).fetchone()[0]
    if count > 0:
        print(f'  {status}: {count}')

conn.close()

In [None]:
# FULL PIPELINE (single command)
# Uncomment and run this to execute the entire pipeline at once:

# from scripts.run_pipeline import run_pipeline
# results = run_pipeline(days=7, dry_run=False)
# results