# Tutorial: Colophon Python API with /examples Data

Audience:
- Researchers, analysts, and technical writers who want to script Colophon in Python.

Prerequisites:
- A Python environment where the local `colophon` package can be imported.
- Basic familiarity with dictionaries, lists, and paths in Python.

Learning goals:
- Load Colophon artifacts from `/examples/` using Python helpers.
- Run `ColophonPipeline` directly without CLI commands.
- Inspect diagnostics and write outputs from notebook code.
- Import Obsidian notes into the knowledge graph through the API.


## Outline

1. Setup and locate project files
2. Load bibliography, outline, graph, and prompt artifacts
3. Run a baseline Colophon pipeline in Python
4. Inspect manuscript outputs and diagnostics
5. Add notes-derived graph links and compare results
6. Exercises, pitfalls, and extensions


In [None]:
from __future__ import annotations

import json
from pathlib import Path

from colophon.io import (
    load_bibliography_with_format,
    load_graph,
    load_outline,
    load_prompts,
    write_text,
)
from colophon.note_import import NotesImportConfig, NotesKnowledgeGraphImporter
from colophon.pipeline import ColophonPipeline, PipelineConfig

def find_repo_root(start: Path) -> Path:
    for candidate in (start, *start.parents):
        if (candidate / "examples").exists() and (candidate / "colophon").is_dir():
            return candidate
    raise FileNotFoundError("Could not find repo root containing examples/ and colophon/")

repo_root = find_repo_root(Path.cwd().resolve())
examples_dir = repo_root / "examples"
build_dir = repo_root / "build" / "jupyter_notebook_tutorial"

{
    "repo_root": str(repo_root),
    "examples_dir_exists": examples_dir.exists(),
    "build_dir": str(build_dir),
}


## Step 1 - Load artifacts from `/examples/`

This cell uses Colophon I/O helpers, the same parsers used by the CLI, but called directly from Python.


In [None]:
bibliography = load_bibliography_with_format(
    examples_dir / "bibliography.json",
    bibliography_format="json",
)
outline = load_outline(examples_dir / "outline.json")
seed_graph = load_graph(examples_dir / "seed_graph.json")
prompts = load_prompts(examples_dir / "prompts.json")

artifact_summary = {
    "sources": len(bibliography),
    "chapters": len(outline),
    "graph_entities": len(seed_graph.entities),
    "graph_relations": len(seed_graph.relations),
    "graph_figures": len(seed_graph.figures),
    "prompt_templates": sorted(prompts.keys()),
}
artifact_summary


## Step 2 - Run the Colophon pipeline (Python API only)

Now we instantiate `PipelineConfig` and `ColophonPipeline` directly and run a draft.


In [None]:
pipeline = ColophonPipeline(
    config=PipelineConfig(
        title="Notebook Tutorial Draft",
        top_k=2,
        max_figures_per_section=2,
        prompt_templates=prompts,
        enable_coordination_agents=True,
    )
)

manuscript = pipeline.run(
    bibliography=bibliography,
    outline=outline,
    graph=seed_graph,
)

{
    "title": manuscript.title,
    "chapters_generated": manuscript.diagnostics.get("chapters_generated"),
    "sections_generated": manuscript.diagnostics.get("sections_generated"),
    "figures_available": manuscript.diagnostics.get("figures_available"),
    "figures_attached": manuscript.diagnostics.get("figures_attached"),
    "gap_requests": len(manuscript.gap_requests),
}


## Step 3 - Inspect generated content and diagnostics

Keep notebook outputs compact: inspect one section preview and a few key diagnostic counts.


In [None]:
first_chapter = manuscript.chapters[0]
first_section = first_chapter.sections[0]

section_preview = first_section.paragraphs[0].text[:220].strip()
if len(first_section.paragraphs[0].text) > 220:
    section_preview += "..."

{
    "chapter": first_chapter.title,
    "section": first_section.title,
    "paragraph_preview": section_preview,
    "citation_issues": len(manuscript.diagnostics.get("citation_issues", [])),
    "figure_issues": len(manuscript.diagnostics.get("figure_issues", [])),
    "coordination_messages": len(manuscript.coordination_messages),
}


## Step 4 - Persist outputs from notebook code

Write manuscript and diagnostics to `build/jupyter_notebook_tutorial/` so results are versionable artifacts.


In [None]:
build_dir.mkdir(parents=True, exist_ok=True)

manuscript_path = build_dir / "tutorial_api_draft.md"
diagnostics_path = build_dir / "tutorial_api_diagnostics.json"

write_text(manuscript_path, manuscript.to_markdown())
write_text(diagnostics_path, json.dumps(manuscript.diagnostics, indent=2))

{
    "manuscript": str(manuscript_path.relative_to(repo_root)),
    "diagnostics": str(diagnostics_path.relative_to(repo_root)),
}


## Step 5 - Optional: import Obsidian notes into the graph

This variation uses `NotesKnowledgeGraphImporter` with `examples/notes/obsidian` and reruns the same pipeline.


In [None]:
importer = NotesKnowledgeGraphImporter(
    config=NotesImportConfig(
        platform="obsidian",
        use_hyperlinks=True,
        use_embeddings=False,
    )
)

graph_with_notes, notes_result = importer.run(
    examples_dir / "notes" / "obsidian",
    graph=seed_graph,
)

pipeline_with_notes = ColophonPipeline(
    config=PipelineConfig(
        title="Notebook Tutorial Draft (with Notes Import)",
        top_k=2,
        max_figures_per_section=2,
        prompt_templates=prompts,
        enable_coordination_agents=True,
    )
)
manuscript_with_notes = pipeline_with_notes.run(
    bibliography=bibliography,
    outline=outline,
    graph=graph_with_notes,
)

{
    "notes_import": notes_result.to_dict(),
    "graph_entities_after_import": len(graph_with_notes.entities),
    "graph_relations_after_import": len(graph_with_notes.relations),
    "baseline_coordination_messages": len(manuscript.coordination_messages),
    "notes_coordination_messages": len(manuscript_with_notes.coordination_messages),
}


## Pitfalls and extensions

Common pitfall:
- Passing the wrong format hint (for example forcing `bibliography_format="csv"` for a JSON file) raises parsing errors.

Fix:
- Use `bibliography_format="auto"` or match the file extension exactly.

Extension ideas:
- Swap in `examples/graph_edges.csv` or `examples/graph_dump.sql` to compare graph loading behavior.
- Turn off coordination (`enable_coordination_agents=False`) and compare diagnostics.


## Exercises

- Run the pipeline with `graph_edges.csv` and compare figure diagnostics to `seed_graph.json`.
- Then run with `graph_dump.sql` and inspect whether figures are available/attached.
- Before running, predict which input will attach the fewest figures and why.


In [None]:
# Exercise scaffold
def run_with_graph(graph_filename: str) -> dict[str, int | str]:
    graph_variant = load_graph(examples_dir / graph_filename)
    pipeline_variant = ColophonPipeline(
        config=PipelineConfig(
            title=f"Graph Variant: {graph_filename}",
            top_k=2,
            max_figures_per_section=2,
            prompt_templates=prompts,
            enable_coordination_agents=False,
        )
    )
    manuscript_variant = pipeline_variant.run(
        bibliography=bibliography,
        outline=outline,
        graph=graph_variant,
    )
    return {
        "graph_file": graph_filename,
        "graph_entities": len(graph_variant.entities),
        "graph_relations": len(graph_variant.relations),
        "figures_available": manuscript_variant.diagnostics.get("figures_available", 0),
        "figures_attached": manuscript_variant.diagnostics.get("figures_attached", 0),
    }

# Try this first:
run_with_graph("seed_graph.json")


In [None]:
# Exercise answer scaffold (one possible solution)
seed_metrics = run_with_graph("seed_graph.json")
csv_metrics = run_with_graph("graph_edges.csv")
sql_metrics = run_with_graph("graph_dump.sql")

{
    "seed_graph.json": seed_metrics,
    "graph_edges.csv": csv_metrics,
    "graph_dump.sql": sql_metrics,
}
