# GEOEventFusion — Quickstart Notebook

This notebook is the recommended entry point for running GEOEventFusion in Google Colab or a local Jupyter environment.

**This notebook is intentionally thin.** All pipeline logic lives in the `geoeventfusion` package.  
Do not copy pipeline code into this notebook — keep notebooks thin and delegate to the package.

## What this notebook does:
1. Installs dependencies
2. Configures API keys
3. Runs the full GEOEventFusion pipeline
4. Displays the storyboard and downloads artifacts

## 1. Install Dependencies

In [None]:
# Install the GEOEventFusion package and its dependencies
# Run this cell once per Colab session
import subprocess
import sys

# Clone or mount the repo (adjust path as needed)
# If running locally, skip this block and ensure the package is installed via:
#   pip install -e .[dev]

# Install from the current directory
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.', '--quiet'])
print('Installation complete.')

## 2. Configure API Keys

Set your API keys here. In Colab, use Colab Secrets (the key icon in the left sidebar).  
For local use, create a `.env` file from `.env.example`.

In [None]:
import os

# ── Colab Secrets (recommended) ─────────────────────────────────────────────────
try:
    from google.colab import userdata
    os.environ['ANTHROPIC_API_KEY'] = userdata.get('ANTHROPIC_API_KEY')
    os.environ['ACLED_API_KEY'] = userdata.get('ACLED_API_KEY', '')
    os.environ['ACLED_EMAIL'] = userdata.get('ACLED_EMAIL', '')
    print('Loaded API keys from Colab Secrets.')
except Exception:
    # Running locally — keys will be loaded from .env file via python-dotenv
    from dotenv import load_dotenv
    load_dotenv()
    print('Loaded API keys from .env file.')

## 3. Configure the Pipeline

In [None]:
from config.settings import PipelineConfig

config = PipelineConfig(
    query="Houthi Red Sea attacks",       # Your geopolitical query
    days_back=90,                          # Analysis window (GDELT max: ~90)
    llm_backend="anthropic",              # "anthropic" or "ollama"
    max_records=250,                       # Articles per GDELT fetch (max: 250)

    # Optional: enable visual intelligence (slower)
    enable_visual_intel=False,
    visual_imagetags=["military", "protest", "explosion"],

    # Optional: ground truth datasets
    # ground_truth_sources=["acled"],
    # ground_truth_country_filter=["Yemen"],

    log_level="INFO",
)

print(f'Query: {config.query}')
print(f'LLM backend: {config.llm_backend}')
print(f'Analysis window: {config.days_back} days')

## 4. Run the Pipeline

In [None]:
from geoeventfusion.pipeline import run_pipeline

# Run the full pipeline — all phases execute sequentially
# Intermediate results are cached to disk for resumability
context = run_pipeline(config)

print(f'Pipeline complete!')
print(f'Run ID: {context.run_id}')
print(f'Output directory: {context.output_dir}')

## 5. View Results

In [None]:
# Display the storyboard summary
if context.storyboard_result:
    sb = context.storyboard_result
    print(f'Query: {sb.query}')
    print(f'Overall confidence: {sb.overall_confidence:.0%}')
    print(f'Escalation risk: {sb.escalation_risk:.0%}')
    print(f'Panels: {len(sb.panels)}')
    print()
    for panel in sb.panels:
        print(f'--- {panel.headline} ---')
        print(f'  Confidence: {panel.confidence:.0%}')
        print(f'  Key events: {len(panel.key_events)}')
        for event in panel.key_events[:3]:
            print(f'    [{event.date}] {event.description[:80]}')
        print()

In [None]:
# Display the HTML storyboard report inline
from pathlib import Path
from IPython.display import HTML, display

html_path = Path(context.output_dir) / 'storyboard_report.html'
if html_path.exists():
    with open(html_path, encoding='utf-8') as f:
        html_content = f.read()
    display(HTML(html_content))
else:
    print('HTML report not found — export may have failed. Check warnings.')

## 6. Download Artifacts (Colab only)

In [None]:
# Download all output artifacts as a ZIP file
try:
    from geoeventfusion.io.colab_helpers import download_run_artifacts
    download_run_artifacts(context.output_dir)
except ImportError:
    print('Colab download helper not available in this environment.')
except Exception as exc:
    print(f'Download failed: {exc}')

## 7. Inspect Individual Outputs

In [None]:
import json
from pathlib import Path

output_dir = Path(context.output_dir)

# Print artifact inventory
print('Artifacts written:')
for f in sorted(output_dir.rglob('*')):
    if f.is_file():
        size_kb = f.stat().st_size / 1024
        print(f'  {f.relative_to(output_dir)}  ({size_kb:.1f} KB)')

In [None]:
# View validation report
validation_path = output_dir / 'validation_report.json'
if validation_path.exists():
    with open(validation_path, encoding='utf-8') as f:
        vr = json.load(f)
    print(f"Grounding score: {vr.get('grounding_score', 0):.0%}")
    print(f"Verification: {vr.get('verification_percentage', 0):.0f}% of events verified")
    flags = vr.get('flags', [])
    if flags:
        print(f"Flags ({len(flags)}):")
        for flag in flags:
            print(f"  [{flag.get('severity', '?')}] {flag.get('flag_type', '?')}: {flag.get('detail', '')}")