# OpenRouter Notebook Converter

**Week 4 Exercise** by Mugisha Caleb Didier

The course notebooks use direct API clients for each provider (OpenAI, Anthropic, Google, xAI),
requiring separate API keys. This tool uses an LLM to automatically convert any notebook
to use **OpenRouter** -- one key for all providers.

Same Week 4 pattern (LLM as code transformer), different target: direct-API Python to OpenRouter Python.

In [None]:
import os
import json
import re
import copy
import shutil
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from model_map import MODEL_MAP

In [None]:
load_dotenv(override=True)

openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
if openrouter_api_key:
    print(f"OpenRouter API Key exists and begins {openrouter_api_key[:8]}")
else:
    print("OpenRouter API Key not set -- add OPENROUTER_API_KEY to your .env")

client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=openrouter_api_key)
MODEL = "anthropic/claude-opus-4-6"

## Core Logic

Key design decision: cells are **interdependent** (key loading, client setup, model dicts
all reference each other), so we group all convertible cells into one batch and send
them to the LLM together with explicit `# ===CELL N===` markers. The LLM returns the
full batch converted, and we split it back into individual cells.

In [None]:
def read_notebook(path):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

def get_cell_source(cell):
    source = cell.get('source', [])
    return ''.join(source) if isinstance(source, list) else source

def set_cell_source(cell, new_source):
    if isinstance(cell.get('source', []), list):
        cell['source'] = new_source.splitlines(keepends=True)
    else:
        cell['source'] = new_source

def write_notebook(notebook, dest_path):
    """Write a notebook dict to disk."""
    with open(dest_path, 'w', encoding='utf-8') as f:
        json.dump(notebook, f, indent=1, ensure_ascii=False)

In [None]:
# Patterns that indicate direct provider API usage
CONVERSION_PATTERNS = [
    # Direct provider clients and URLs
    r'(?<!\w)OpenAI\(\)',
    r'api\.anthropic\.com',
    r'generativelanguage\.googleapis\.com',
    r'api\.x\.ai',
    r'api\.groq\.com',
    # Native SDK imports
    r'from anthropic import',
    r'from google import genai',
    # Per-provider API key loading
    r"os\.getenv\(['\"]OPENAI_API_KEY['\"]\)",
    r"os\.getenv\(['\"]ANTHROPIC_API_KEY['\"]\)",
    r"os\.getenv\(['\"]GOOGLE_API_KEY['\"]\)",
    r"os\.getenv\(['\"]GROK_API_KEY['\"]\)",
    r"os\.getenv\(['\"]GROQ_API_KEY['\"]\)",
    r"os\.environ\[['\"]OPENAI_API_KEY['\"]\]",
    # Client variable references in dicts ({"gpt-5": openai, ...})
    r':\s*openai\b',
    r':\s*anthropic\b',
    r':\s*gemini\b',
    r':\s*grok\b',
    r':\s*groq\b',
    # Provider variables used as client objects (openai.chat.completions...)
    r'\bopenai\.',
    r'\banthropic\.',
    r'\bgemini\.',
    r'\bgrok\.',
    r'\bgroq\.',
    # Provider variables passed as function arguments
    r'[\(,]\s*openai\b',
    r'[\(,]\s*anthropic\b',
    r'[\(,]\s*gemini\b',
    r'[\(,]\s*grok\b',
    r'[\(,]\s*groq\b',
    # Unprefixed model name strings that need provider prefix
    r"""['"](?:gpt-|claude-|gemini-|grok-|deepseek-)[^'"]*['"]""",
]

# Hard skip -- these cells should never be touched
SKIP_PATTERNS = [
    r'fine_tuning',
    r'images\.generate',
    r'audio\.speech',
]


def needs_conversion(cell_source):
    """Check if a code cell needs conversion.

    Returns True if the cell contains direct provider API usage (client setup,
    native SDK imports, per-provider key loading, or unprefixed model names).
    Returns False for cells using unsupported features (fine-tuning, DALL-E, TTS).
    """
    for pattern in SKIP_PATTERNS:
        if re.search(pattern, cell_source):
            return False
    for pattern in CONVERSION_PATTERNS:
        if re.search(pattern, cell_source):
            return True
    return False


def scan_notebook(notebook):
    results = []
    for i, cell in enumerate(notebook['cells']):
        if cell['cell_type'] != 'code':
            continue
        source = get_cell_source(cell)
        if source.strip() and needs_conversion(source):
            results.append({'index': i, 'source': source})
    return results

In [None]:
CELL_MARKER = '# ===CELL {}==='

# Build a readable map string for injection into the prompt
map_str = '\n'.join(f'   "{k}" -> "{v}"' for k, v in MODEL_MAP.items())

SYSTEM_PROMPT = f"""You are a code conversion specialist. You will receive multiple Python code
cells separated by markers like `# ===CELL 0===`. Convert ALL cells to use OpenRouter.

RULES:
1. Replace ALL direct provider clients with a SINGLE OpenRouter client:
   `client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv('OPENROUTER_API_KEY'))`
2. Replace ALL separate API key variables (openai_api_key, anthropic_api_key, google_api_key,
   grok_api_key, groq_api_key) with one: openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
3. Use this MODEL_MAP for exact model name conversions:
{map_str}
   For any model NOT in this map:
   - "gpt-*" -> "openai/gpt-*"
   - "claude-*" -> "anthropic/claude-*"
   - "gemini-*" -> "google/gemini-*"
   - "grok-*" -> "x-ai/grok-*"
   - "deepseek-*" -> "deepseek/deepseek-*"
   - Already prefixed (contains "/") -> leave as-is
4. Replace ALL references to per-provider client variables (openai, anthropic, gemini,
   grok, groq) with the single `client` variable. This includes:
   - Client dictionaries: {{"gpt-5": openai, ...}} -> {{"openai/gpt-5": client, ...}}
   - Method calls: openai.chat.completions.create(...) -> client.chat.completions.create(...)
   - Function arguments: port(openai, MODEL, code) -> port(client, MODEL, code)
5. REMOVE Ollama/localhost models from model lists and client dicts entirely.
   Local model names (llama3.2, qwen2.5-coder, gpt-oss:20b, deepseek-r1:1.5b)
   are Ollama tags, not valid OpenRouter IDs. Drop them -- OpenRouter can't
   serve local models. Also remove any `ollama = OpenAI(base_url="http://localhost:...")` client setup.
6. PRESERVE all other logic, comments, function definitions, and structure
7. Keep the EXACT same `# ===CELL N===` markers in your output so I can split cells back
8. Fix incorrect provider prefixes in already-prefixed model names:
   - "xai/" -> "x-ai/"
   - "gemini/" -> "google/"

Respond ONLY with the converted code cells, keeping the markers. No explanations, no markdown fences."""

In [None]:
def convert_cells_batch(matches):
    """Convert all matched cells in one LLM call.
    
    Groups cells together so the LLM sees the full picture:
    key loading + client setup + model dicts as one unit.
    """
    parts = []
    for i, m in enumerate(matches):
        parts.append(CELL_MARKER.format(i))
        parts.append(m['source'])
    batch = '\n'.join(parts)

    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": f"Convert these code cells to use OpenRouter:\n\n{batch}"}
        ]
    )
    result = response.choices[0].message.content.strip()
    # Strip markdown fences
    result = re.sub(r'^```(?:python)?\s*\n?|```\s*$', '', result).strip()

    # Split back into individual cells using the markers
    converted = {}
    cell_splits = re.split(r'# ===CELL (\d+)===\n?', result)
    for j in range(1, len(cell_splits) - 1, 2):
        cell_idx = int(cell_splits[j])
        cell_code = cell_splits[j + 1].strip()
        if cell_idx < len(matches):
            converted[cell_idx] = cell_code

    return converted

In [None]:
def convert_notebook(input_path):
    """Convert a notebook to use OpenRouter.

    Returns:
        (summary_str, modified_notebook, matches_with_diffs)
        The caller decides whether/when to write the result.
    """
    input_path = Path(input_path)
    notebook = read_notebook(input_path)
    modified = copy.deepcopy(notebook)
    matches = scan_notebook(notebook)

    if not matches:
        return "No cells need conversion.", None, []

    converted_map = convert_cells_batch(matches)

    diffs = []
    summary = [f"Converting {len(matches)} cells in {input_path.name}:"]

    for batch_idx, m in enumerate(matches):
        nb_idx = m['index']
        original = m['source']

        if batch_idx in converted_map:
            converted = converted_map[batch_idx]
            set_cell_source(modified['cells'][nb_idx], converted)
            modified['cells'][nb_idx]['outputs'] = []
            modified['cells'][nb_idx]['execution_count'] = None

            diffs.append({'cell': nb_idx, 'before': original, 'after': converted})
            summary.append(f"\n--- Cell {nb_idx} ---")
            summary.append(f"BEFORE:\n{original[:200]}")
            summary.append(f"AFTER:\n{converted[:200]}")
        else:
            summary.append(f"\n--- Cell {nb_idx}: FAILED (marker not found in LLM output) ---")

    return '\n'.join(summary), modified, diffs

## Gradio UI

Pick a course notebook from the dropdown, **Scan** to preview which cells will change,
**Convert** to see the full BEFORE/AFTER diff, then **Apply** to overwrite the original
(a `.bak` backup is created automatically).

In [None]:
NOTEBOOK_DIR = Path.cwd()
PROJECT_ROOT = (NOTEBOOK_DIR / "../../../").resolve()


def discover_notebooks():
    """Scan PROJECT_ROOT/week*/ for course notebooks.

    Returns a list of (label, absolute_path) tuples sorted by week then filename.
    Excludes community-contributions/, solutions/, and .bak files.
    """
    results = []
    for week_dir in sorted(PROJECT_ROOT.glob("week*")):
        if not week_dir.is_dir():
            continue
        for nb in sorted(week_dir.glob("*.ipynb")):
            if nb.suffix == ".bak":
                continue
            rel = nb.relative_to(PROJECT_ROOT)
            results.append((str(rel), str(nb)))
    return results


def on_notebook_select(path):
    if not path:
        return "Select a notebook above.", gr.update(interactive=False), gr.update(interactive=False)
    return f"`{path}`", gr.update(interactive=True), gr.update(interactive=True)


def do_scan(path):
    if not path:
        return "Select a notebook first."
    p = Path(path)
    notebook = read_notebook(p)
    matches = scan_notebook(notebook)
    if not matches:
        return f"No cells in `{p.name}` need conversion."
    lines = [f"Found **{len(matches)}** cells to convert in `{p.name}`:\n"]
    for m in matches:
        lines.append(f"**Cell {m['index']}**")
        lines.append(f"```python\n{m['source'][:300]}\n```\n")
    return '\n'.join(lines)


def do_convert(path):
    if not path:
        return "Select a notebook first.", None, None, gr.update(interactive=False)
    summary, modified, diffs = convert_notebook(path)
    if modified is None:
        return summary, None, None, gr.update(interactive=False)
    diff_lines = []
    for d in diffs:
        diff_lines.append(f"--- Cell {d['cell']} BEFORE ---\n{d['before']}")
        diff_lines.append(f"--- Cell {d['cell']} AFTER  ---\n{d['after']}")
    return '\n\n'.join(diff_lines), modified, path, gr.update(interactive=True)


def do_apply(converted_state, path_state):
    if converted_state is None:
        return "Nothing to apply -- run Convert first."
    p = Path(path_state)
    if not p.exists():
        return f"File not found: {p}"
    backup_path = p.with_suffix(p.suffix + '.bak')
    shutil.copy2(p, backup_path)
    write_notebook(converted_state, p)
    return f"Done -- backup at {backup_path}"

In [None]:
notebook_choices = discover_notebooks()

with gr.Blocks(title="OpenRouter Notebook Converter", theme=gr.themes.Soft()) as ui:
    gr.Markdown("# OpenRouter Notebook Converter\nConvert any course notebook to use OpenRouter with a single API key.")

    converted_nb = gr.State(value=None)
    original_path = gr.State(value=None)

    notebook_input = gr.Dropdown(
        choices=notebook_choices,
        label="Select a course notebook",
        interactive=True,
    )
    file_info = gr.Markdown("Select a notebook above.")

    with gr.Row():
        scan_btn = gr.Button("1. Scan", variant="secondary", interactive=False)
        convert_btn = gr.Button("2. Convert", variant="primary", interactive=False)

    scan_output = gr.Markdown()
    convert_output = gr.Textbox(label="Conversion diff (before / after)", lines=18, show_copy_button=True)

    apply_btn = gr.Button("3. Apply changes to original file", variant="stop", interactive=False)
    apply_output = gr.Textbox(label="Result", lines=1, interactive=False)

    notebook_input.change(on_notebook_select, inputs=notebook_input, outputs=[file_info, scan_btn, convert_btn])
    scan_btn.click(do_scan, inputs=notebook_input, outputs=scan_output)
    convert_btn.click(do_convert, inputs=notebook_input, outputs=[convert_output, converted_nb, original_path, apply_btn])
    apply_btn.click(do_apply, inputs=[converted_nb, original_path], outputs=apply_output)

ui.launch()

## Notes

**What it converts:** `OpenAI()` direct calls, Anthropic/Google/xAI/Groq provider URLs,
native SDKs, API key loading, client dictionaries referencing provider variables.

**What it skips:** fine-tuning, DALL-E, TTS (not supported on OpenRouter).

**Always review the output** before running -- LLM-generated code should be verified.