# GEOEventFusion â€” Quickstart Notebook

This notebook is the recommended entry point for running GEOEventFusion in Google Colab or a local Jupyter environment.

**This notebook is intentionally thin.** All pipeline logic lives in the `geoeventfusion` package.  
Do not copy pipeline code into this notebook â€” keep notebooks thin and delegate to the package.

## What this notebook does:
1. Installs dependencies
2. Configures API keys
3. Runs the full GEOEventFusion pipeline
4. Displays the storyboard and downloads artifacts

## Google Colab setup (first time only)
Before running this notebook in Colab, clone the repository into your Colab session:
```
!git clone https://github.com/<your-org>/GEOEventFusion.git /content/GEOEventFusion
```
Then open this notebook from `/content/GEOEventFusion/notebooks/quickstart.ipynb`.  
The install cell below will detect the repo location automatically.

## 1. Install Dependencies

In [None]:
import os
import subprocess
import sys
from pathlib import Path

# â”€â”€ Detect runtime environment â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
try:
    import google.colab  # noqa: F401
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

# â”€â”€ Locate the repo root â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Search common locations so this cell works regardless of how the notebook
# was opened (Colab clone, Drive mount, or local Jupyter).
repo_candidates = [
    Path('/content/GEOEventFusion'),                      # Colab: cloned to /content
    Path('/content/drive/MyDrive/GEOEventFusion'),        # Colab: Drive mount
    Path(__file__).parent.parent if '__file__' in dir() else Path('.'),  # local script
    Path('..'),                                           # notebook run from notebooks/
    Path('.'),                                            # already at repo root
]

repo_root = next(
    (p.resolve() for p in repo_candidates if (p / 'pyproject.toml').exists()),
    None,
)

if repo_root is None:
    raise RuntimeError(
        "GEOEventFusion repo not found.\n"
        "In Colab, first run:\n"
        "  !git clone https://github.com/<your-org>/GEOEventFusion.git /content/GEOEventFusion\n"
        "Then re-run this cell."
    )

os.chdir(repo_root)
print(f'Repository root: {repo_root}')

# â”€â”€ Install the package and all dependencies â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.', '--quiet'])
print('Installation complete.')

In [None]:
import logging

# Display pipeline progress in the notebook output
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)-8s  %(name)s â€” %(message)s',
    datefmt='%H:%M:%S',
    force=True,
)
# Reduce noise from HTTP and parsing libraries
for _noisy in ('urllib3', 'feedparser', 'trafilatura', 'httpx', 'anthropic'):
    logging.getLogger(_noisy).setLevel(logging.WARNING)

print('Logging configured.')

## 2. Configure API Keys

**Colab:** Add your secrets via the key icon (ðŸ”‘) in the left sidebar, then run this cell.  
**Local:** Create a `.env` file from `.env.example` â€” keys are loaded automatically.

In [None]:
import os

def _set_env_if_value(key: str, value) -> None:
    """Set an environment variable only when value is a non-empty string."""
    if value and isinstance(value, str):
        os.environ[key] = value

def _safe_colab_secret(userdata, key: str) -> str:
    """Retrieve a Colab secret, returning '' if not set or not permitted."""
    try:
        val = userdata.get(key)
        return val if val is not None else ''
    except Exception:
        return ''

# â”€â”€ Colab Secrets (recommended for Colab) â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
try:
    from google.colab import userdata  # type: ignore[import]

    _set_env_if_value('ANTHROPIC_API_KEY', _safe_colab_secret(userdata, 'ANTHROPIC_API_KEY'))
    _set_env_if_value('ACLED_API_KEY',     _safe_colab_secret(userdata, 'ACLED_API_KEY'))
    _set_env_if_value('ACLED_EMAIL',       _safe_colab_secret(userdata, 'ACLED_EMAIL'))
    _set_env_if_value('OLLAMA_API_KEY',    _safe_colab_secret(userdata, 'OLLAMA_API_KEY'))

    print('Loaded secrets from Colab Secrets.')
    if not os.getenv('ANTHROPIC_API_KEY') and not os.getenv('OLLAMA_API_KEY'):
        print('  âš   No LLM key found. Add ANTHROPIC_API_KEY to Colab Secrets, or use test_mode=True.')

except ImportError:
    # Running locally â€” load from .env file
    try:
        from dotenv import load_dotenv
        load_dotenv()
        print('Loaded API keys from .env file.')
    except ImportError:
        print('python-dotenv not installed; reading keys from environment variables directly.')

print(f'  ANTHROPIC_API_KEY set: {bool(os.getenv("ANTHROPIC_API_KEY"))}')
print(f'  ACLED_API_KEY set:     {bool(os.getenv("ACLED_API_KEY"))}')

## 3. Configure the Pipeline

In [None]:
from config.settings import PipelineConfig

config = PipelineConfig(
    query="Houthi Red Sea attacks",       # Your geopolitical query
    days_back=90,                          # Analysis window (GDELT max: ~90)
    llm_backend="anthropic",              # "anthropic" or "ollama"
    max_records=250,                       # Articles per GDELT fetch (max: 250)

    # Optional: enable visual intelligence (slower â€” gated by GDELT image modes)
    enable_visual_intel=False,
    visual_imagetags=["military", "protest", "explosion"],

    # Optional: ground truth datasets (requires ACLED_API_KEY)
    # ground_truth_sources=["acled"],
    # ground_truth_country_filter=["Yemen"],

    # Set test_mode=True to run with fixture data and no real API calls
    # (useful for verifying the install before adding API keys)
    test_mode=False,

    log_level="INFO",
)

print(f'Query:           {config.query}')
print(f'LLM backend:     {config.llm_backend}')
print(f'Analysis window: {config.days_back} days')
print(f'Test mode:       {config.test_mode}')

## 4. Run the Pipeline

In [None]:
from geoeventfusion.pipeline import run_pipeline

# Run the full pipeline â€” all phases execute sequentially.
# Intermediate results are cached to disk; re-run with the same context to resume.
context = run_pipeline(config)

print(f'Pipeline complete!')
print(f'Run ID:           {context.run_id}')
print(f'Output directory: {context.output_dir}')
if context.warnings:
    print(f'Warnings ({len(context.warnings)}):')
    for w in context.warnings[:5]:
        print(f'  {w}')
if context.errors:
    print(f'Errors ({len(context.errors)}):')
    for e in context.errors:
        print(f'  {e}')

## 5. View Results

In [None]:
# Display the storyboard summary
if context.storyboard_result:
    sb = context.storyboard_result
    print(f'Query:              {sb.query}')
    print(f'Overall confidence: {sb.overall_confidence:.0%}')
    print(f'Escalation risk:    {sb.escalation_risk:.0%}')
    print(f'Panels:             {len(sb.panels)}')
    print()
    for panel in sb.panels:
        print(f'--- {panel.headline} ---')
        print(f'  Confidence: {panel.confidence:.0%}')
        print(f'  Key events: {len(panel.key_events)}')
        for event in panel.key_events[:3]:
            print(f'    [{event.date}] {event.description[:80]}')
        print()
else:
    print('Storyboard not generated â€” check warnings/errors above.')

In [None]:
# Display the HTML storyboard report inline
from pathlib import Path
from IPython.display import HTML, display

html_path = Path(context.output_dir) / 'storyboard_report.html'
if html_path.exists():
    with open(html_path, encoding='utf-8') as f:
        html_content = f.read()
    display(HTML(html_content))
else:
    print('HTML report not found â€” export may have failed. Check warnings above.')

## 6. Download Artifacts (Colab only)

In [None]:
# Download all output artifacts as individual files (triggers browser download in Colab)
try:
    from geoeventfusion.io.colab_helpers import download_run_artifacts
    download_run_artifacts(context.output_dir)
except ImportError:
    print('Colab download helper not available in this environment.')
except Exception as exc:
    print(f'Download failed: {exc}')

## 7. Inspect Individual Outputs

In [None]:
import json
from pathlib import Path

output_dir = Path(context.output_dir)

# Print artifact inventory
print('Artifacts written:')
for f in sorted(output_dir.rglob('*')):
    if f.is_file():
        size_kb = f.stat().st_size / 1024
        print(f'  {f.relative_to(output_dir)}  ({size_kb:.1f} KB)')

In [None]:
# View validation report
validation_path = output_dir / 'validation_report.json'
if validation_path.exists():
    with open(validation_path, encoding='utf-8') as f:
        vr = json.load(f)
    print(f"Grounding score:  {vr.get('grounding_score', 0):.0%}")
    print(f"Verification:     {vr.get('verification_percentage', 0):.0f}% of events verified")
    flags = vr.get('flags', [])
    if flags:
        print(f"Flags ({len(flags)}):")
        for flag in flags:
            print(f"  [{flag.get('severity', '?')}] {flag.get('flag_type', '?')}: {flag.get('detail', '')}")
else:
    print('validation_report.json not found.')