# Tutorial â€” Isaac 0.1 Frame-by-Frame
Apply the Isaac 0.1 model to consecutive frames from `surf.mp4`, then render bounding boxes back onto each frame for review.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ericpence/perceptron_repo/blob/main/cookbook/recipes/tutorials/isaac_0.1_frame_by_frame/isaac_0.1_frame_by_frame.ipynb)

## Install dependencies
Install the SDK, OpenCV for video decoding, and Pillow for previewing annotated frames.

In [None]:
%pip install --upgrade perceptron opencv-python pillow tqdm --quiet

## Configure the Perceptron client
Load your API key from the environment (or inline) and configure the SDK once for the entire notebook.

In [None]:
from pathlib import Path

import cv2
from IPython.display import Image as IPyImage
from IPython.display import display
from PIL import Image, ImageDraw
from tqdm import tqdm

from cookbook.utils import cookbook_asset
from perceptron import configure, image, perceive, text

# configure() reads PERCEPTRON_API_KEY from the environment.
# configure() reads PERCEPTRON_API_KEY from the environment.
configure(
    provider="perceptron",
    # model="isaac-0.1",  # Enable once the SDK supports the model argument.
)

VIDEO_PATH = cookbook_asset("tutorials", "isaac_0.1_frame_by_frame", "surf.mp4")
FRAMES_DIR = Path("frames")
ANNOTATIONS_DIR = Path("frames_annotated")
OUTPUT_VIDEO = Path("surf_annotated.mp4")

if not VIDEO_PATH.exists():
    raise FileNotFoundError(f"Missing asset: {VIDEO_PATH}")

FRAMES_DIR.mkdir(exist_ok=True)
ANNOTATIONS_DIR.mkdir(exist_ok=True)

## Extract frames from the MP4
Decode the source video into individual frames so we can run the SDK on each image.

In [None]:
def extract_frames(video_path: Path, dest_dir: Path, stride: int = 1) -> list[Path]:
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        raise RuntimeError(f"Failed to open video: {video_path}")

    frame_paths: list[Path] = []
    idx = 0
    saved = 0
    while True:
        ok, frame = cap.read()
        if not ok:
            break
        if idx % stride == 0:
            frame_path = dest_dir / f"frame_{idx:05d}.jpg"
            cv2.imwrite(str(frame_path), frame)
            frame_paths.append(frame_path)
            saved += 1
        idx += 1
    cap.release()
    print(f"Extracted {saved} frames (stride={stride})")
    return frame_paths


FRAME_PATHS = extract_frames(VIDEO_PATH, FRAMES_DIR, stride=3)
if not FRAME_PATHS:
    raise RuntimeError("No frames extracted; check the video and stride settings.")

# Preview the first frame
first_frame = FRAME_PATHS[0]
display(IPyImage(filename=str(first_frame)))

## Detect surfers on each frame
Call `detect` in a loop to localize surfers frame-by-frame, then overlay the normalized boxes onto each frame.

In [None]:
TARGET_CLASSES = ["surfer", "surfboard"]
PROMPT = "Find every surfer and surfboard in the frame. Return one bounding box per item."


@perceive(expects="box", allow_multiple=True)
def detect_surfers(frame_path):
    frame = image(frame_path)
    return frame + text(PROMPT)


all_detections = []

for frame_path in tqdm(FRAME_PATHS, desc="Detecting frames"):
    result = detect_surfers(str(frame_path))
    boxes = result.points or []
    all_detections.append(
        {
            "frame": frame_path.name,
            "boxes_count": len(boxes),
            "text": result.text,
        }
    )

    img = Image.open(frame_path).convert("RGB")
    draw = ImageDraw.Draw(img)

    def to_px(point):
        return point.x / 1000 * img.width, point.y / 1000 * img.height

    for box in boxes:
        top_left = to_px(box.top_left)
        bottom_right = to_px(box.bottom_right)
        draw.rectangle([top_left, bottom_right], outline="lime", width=3)
        label = box.mention or getattr(box, "label", None) or "surfer"
        text_x = top_left[0]
        text_y = max(top_left[1] - 18, 0)
        draw.text((text_x, text_y), label, fill="lime")

    output_path = ANNOTATIONS_DIR / frame_path.name
    img.save(output_path)

print(f"Annotated {len(FRAME_PATHS)} frames")

## Rebuild the annotated video
Stitch the saved frames back into an MP4 for quick review.

In [None]:
def stitch_video(frame_dir: Path, output_path: Path, fps: int = 30) -> None:
    frames = sorted(frame_dir.glob("frame_*.jpg"))
    if not frames:
        raise RuntimeError("No annotated frames found to stitch.")

    sample = cv2.imread(str(frames[0]))
    height, width = sample.shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    writer = cv2.VideoWriter(str(output_path), fourcc, fps, (width, height))

    for frame_path in frames:
        frame = cv2.imread(str(frame_path))
        writer.write(frame)
    writer.release()
    print(f"Saved annotated video to {output_path}")


stitch_video(ANNOTATIONS_DIR, OUTPUT_VIDEO, fps=10)
display(IPyImage(filename=str(ANNOTATIONS_DIR / FRAME_PATHS[0].name)))

## Conclusion & next steps
- Adjust `TARGET_CLASSES` and `PROMPT` to match your use case.
- Tune the `stride` and FPS values for longer videos.
- Persist `all_detections` to JSON or a database for further analysis.