# Angel Composition Prompt Tuning

Interactive notebook for tuning Ollama prompts for angel drawings.

**Workflow:** Edit prompts/params → re-run generation cell → see results inline in seconds.

In [None]:
# Cell 1: Setup — imports and connectivity checks
%matplotlib inline
import matplotlib.pyplot as plt

from helpers import (
    Composition, AiComposition, ai_to_composition, compositions_to_few_shot,
    call_ollama, COMPOSITION_SCHEMA, OLLAMA_SYSTEM_PROMPT, FOCUSED_SYSTEM_PROMPT,
    get_curated, get_curated_words, save_compositions, get_connection,
    validate, bounding_box, count_strokes, count_points,
    draw, draw_grid, draw_comparison,
)
from helpers.ollama import check_connection, build_few_shot_messages, DEFAULT_URL, DEFAULT_MODEL
from helpers.models import parse_ollama_response
from helpers.validate import score_breakdown

print(check_connection())
print(f"Model: {DEFAULT_MODEL}")
print(f"URL: {DEFAULT_URL}")

In [None]:
# Cell 2: Load curated angel data from DB
SUBJECT = "angel"
curated = get_curated(SUBJECT, limit=50)
print(f"Loaded {len(curated)} curated {SUBJECT} compositions")

# Show first 10 as a grid
fig = draw_grid(curated[:10], cols=5, title=f"Top 10 Curated '{SUBJECT}' Compositions")
plt.show()

In [None]:
# Cell 3: Prompt configuration — EDIT THESE to experiment

# Which system prompt to use
system_prompt = FOCUSED_SYSTEM_PROMPT

# How many compositions to request per call
PER_CALL = 10

# How many curated examples to use as few-shot context
FEW_SHOT_COUNT = 10

# Ollama generation parameters
TEMPERATURE = 0.1
TOP_P = 0.9
REPEAT_PENALTY = 1.1
NUM_PREDICT = 16384

# Model override (or use default)
MODEL = DEFAULT_MODEL

# Build few-shot pairs from curated data
few_shot_comps = curated[:FEW_SHOT_COUNT]
few_shot_pairs = []
chunk_size = 2
for i in range(0, len(few_shot_comps), chunk_size):
    chunk = few_shot_comps[i:i + chunk_size]
    user_prompt = f"Draw {len(chunk)} distinct variation{'s' if len(chunk) > 1 else ''} of: {SUBJECT}"
    assistant_response = compositions_to_few_shot(SUBJECT, chunk)
    few_shot_pairs.append((user_prompt, assistant_response))
    print(f"User prompt: {user_prompt}")

print(f"Built {len(few_shot_pairs)} few-shot pairs from {len(few_shot_comps)} curated examples")
print(f"Model: {MODEL}, Temperature: {TEMPERATURE}, Per call: {PER_CALL}")

In [None]:
# Cell 4: Generate — single Ollama API call
import time

messages = build_few_shot_messages(SUBJECT, PER_CALL, few_shot_pairs, system_prompt)
print(f"Sending {len(messages)} messages ({sum(len(m['content']) for m in messages):,} chars total)")

t0 = time.time()
raw_response = call_ollama(
    messages, model=MODEL, schema=COMPOSITION_SCHEMA,
    temperature=TEMPERATURE, top_p=TOP_P,
    repeat_penalty=REPEAT_PENALTY, num_predict=NUM_PREDICT,
)
elapsed = time.time() - t0

# Parse response into AiComposition objects
import json
ai_comps = [AiComposition.from_dict(c) for c in raw_response.get("compositions", [])]
generated = [ai_to_composition(ac, generation_method=f"notebook-{MODEL}") for ac in ai_comps]

print(f"Generated {len(generated)} compositions in {elapsed:.1f}s")
for i, comp in enumerate(generated):
    is_valid, score = validate(comp)
    print(f"  [{i}] valid={is_valid}, score={score:.4f}, strokes={count_strokes(comp)}, points={count_points(comp)}")

In [None]:
# Cell 5: Visualize generated compositions
if generated:
    fig = draw_grid(generated, cols=min(len(generated), 5), title=f"Generated '{SUBJECT}' Compositions")
    plt.show()
else:
    print("No compositions generated.")

In [None]:
# Cell 6: Side-by-side comparison — curated vs generated
if generated:
    fig = draw_comparison(
        curated[:5], generated[:5],
        cols=5, title=f"'{SUBJECT}': Curated (top) vs Generated (bottom)"
    )
    plt.show()

In [None]:
# Cell 7: Detailed score breakdown for each generated composition
for i, comp in enumerate(generated):
    breakdown = score_breakdown(comp)
    print(f"\n=== Composition {i} ===")
    for key, val in breakdown.items():
        print(f"  {key}: {val}")

In [None]:
# Cell 8: Parameter sweep — loop over temperatures
import time

temperatures = [0.1, 0.3, 0.5, 0.7, 0.9]
sweep_results = {}

for temp in temperatures:
    msgs = build_few_shot_messages(SUBJECT, PER_CALL, few_shot_pairs, system_prompt)
    t0 = time.time()
    try:
        resp = call_ollama(
            msgs, model=MODEL, schema=COMPOSITION_SCHEMA,
            temperature=temp, top_p=TOP_P,
            repeat_penalty=REPEAT_PENALTY, num_predict=NUM_PREDICT,
        )
        elapsed = time.time() - t0
        ais = [AiComposition.from_dict(c) for c in resp.get("compositions", [])]
        comps = [ai_to_composition(ac) for ac in ais]
        scores = [validate(c)[1] for c in comps if validate(c)[0]]
        sweep_results[temp] = {
            "compositions": comps,
            "scores": scores,
            "avg_score": sum(scores) / len(scores) if scores else 0,
            "valid_count": len(scores),
            "elapsed": elapsed,
        }
        print(f"  temp={temp}: {len(scores)} valid, avg_score={sweep_results[temp]['avg_score']:.4f}, {elapsed:.1f}s")
    except Exception as e:
        print(f"  temp={temp}: ERROR — {e}")
        sweep_results[temp] = {"compositions": [], "scores": [], "avg_score": 0, "valid_count": 0, "elapsed": 0}

# Plot temperature vs quality
temps_ok = [t for t in temperatures if sweep_results[t]["scores"]]
if temps_ok:
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    ax.bar([str(t) for t in temps_ok], [sweep_results[t]["avg_score"] for t in temps_ok], color="#2196F3")
    ax.set_xlabel("Temperature")
    ax.set_ylabel("Avg Quality Score")
    ax.set_title(f"Temperature Sweep — '{SUBJECT}'")
    ax.set_ylim(0, 1.0)
    plt.tight_layout()
    plt.show()

In [None]:
# Cell 9: Visualize sweep results — one row per temperature
for temp in temperatures:
    result = sweep_results.get(temp)
    if result and result["compositions"]:
        fig = draw_grid(
            result["compositions"][:5], cols=5,
            title=f"temp={temp}  avg_q={result['avg_score']:.3f}",
            figsize_per_cell=2.5,
        )
        plt.show()

In [None]:
# Cell 10: Save good results to database
# Only run this cell when you're happy with the generated output!

# Filter to valid compositions only
to_save = [c for c in generated if validate(c)[0]]
print(f"{len(to_save)} valid compositions ready to save")

# Uncomment the next line to actually save:
# saved = save_compositions(SUBJECT, to_save, generation_method=f"notebook-{MODEL}")
# print(f"Saved {saved} compositions to database")

In [None]:
# Cell 11: Explore other available subjects
try:
    words = get_curated_words()
    print(f"{len(words)} words with curated data:")
    for w in words:
        curated_count = len(get_curated(w, limit=1000))
        print(f"  {w}: {curated_count} curated compositions")
except Exception as e:
    print(f"Could not connect to DB: {e}")

## Tips for Prompt Tuning

1. **Start with Cell 3** — change parameters and re-run cells 4-6
2. **Temperature** — lower (0.1-0.3) = more consistent, higher (0.5-0.9) = more varied
3. **Few-shot count** — more examples = better quality but slower generation
4. **System prompt** — edit directly in Cell 3 or modify `helpers/ollama.py`
5. **Use Cell 8** to find optimal temperature before committing to saves
6. **Cell 10** saves to DB — only use when quality is consistently good