# Multi-image in-context learning
Seed Isaac with cat vs. dog exemplars and detect the correct class in a new frame.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ericpence/perceptron_repo/blob/main/cookbook/recipes/capabilities/multi-image-in-context-learning/multi-image-in-context-learning.ipynb)

In [None]:
%pip install --upgrade perceptron pillow --quiet

## Configure the SDK and resolve assets

In [None]:
from PIL import Image, ImageDraw, ImageFont

from cookbook.utils import cookbook_asset
from perceptron import annotate_image, bbox, configure, detect

# configure() reads PERCEPTRON_API_KEY from the environment.
# configure() reads PERCEPTRON_API_KEY from the environment.
configure(
    provider="perceptron",
    # model="isaac-0.1",  # Enable once the SDK supports the model argument.
)

CAT_IMAGE = cookbook_asset("in-context-learning", "multi", "classA.jpg")
DOG_IMAGE = cookbook_asset("in-context-learning", "multi", "classB.webp")
TARGET_IMAGE = cookbook_asset("in-context-learning", "multi", "cat_dog_input.png")
for path in (CAT_IMAGE, DOG_IMAGE, TARGET_IMAGE):
    if not path.exists():
        raise FileNotFoundError(f"Missing asset: {path}")

## Build the exemplar shots

In [None]:
cat_example = annotate_image(
    str(CAT_IMAGE),
    {
        "classA": [
            bbox(316, 136, 703, 906, mention="classA"),
        ]
    },
)

dog_example = annotate_image(
    str(DOG_IMAGE),
    {
        "classB": [
            bbox(161, 48, 666, 980, mention="classB"),
        ]
    },
)

## Detect the target frame

In [None]:
result = detect(
    str(TARGET_IMAGE),
    classes=["classA", "classB"],
    examples=[cat_example, dog_example],
)

print(result.text)
boxes = result.points or []
for box in boxes:
    print(box)

## Preview the annotated output

In [None]:
img = Image.open(TARGET_IMAGE).convert("RGB")
draw = ImageDraw.Draw(img)
try:
    font = ImageFont.truetype("arial.ttf", size=20)
except OSError:
    font = ImageFont.load_default()


def to_px(point):
    return point.x / 1000 * img.width, point.y / 1000 * img.height


for box in boxes:
    top_left = to_px(box.top_left)
    bottom_right = to_px(box.bottom_right)
    draw.rectangle([top_left, bottom_right], outline="lime", width=3)
    label = box.mention or getattr(box, "label", None) or box.mention
    text_position = (top_left[0], max(top_left[1] - 20, 0))
    draw.text(text_position, label, fill="lime", font=font)

annotated_path = TARGET_IMAGE.with_name(f"{TARGET_IMAGE.stem}_annotated.png")
img.save(annotated_path)
print(f"Saved annotated target to {annotated_path}")
display(img)

## Conclusion & next steps
- Add more exemplar shots (or additional classes) to make tougher distinctions.
- Vary the prompt or classes to localize other objects across multiple example images.
- Pair this workflow with the single-image ICL or detection notebooks to compare approaches.