# Computer vision

## Capture and save all frames from a video with OpenCV

Run `pip install opencv-python` to install opencv-python, record a video file using your default webcam, and save it to the project directory as `files/test.MOV`:

In [1]:
import cv2
import os
import time

# Recursively create the "files/frames" folder if it doesn't exist
if not os.path.exists("files/frames"):
    os.makedirs("files/frames")

# Take a short video from webcam and save to files/test.MOV
cap = cv2.VideoCapture(0)  # Open the default camera
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Define the codec
out = cv2.VideoWriter('files/test.MOV', fourcc, 20.0, (640, 480))  # Create VideoWriter object

# Record for 5 seconds (or until 'q' is pressed)
start_time = time.time()
duration = 5  # Recording duration in seconds

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Check if we've exceeded the duration
    if time.time() - start_time > duration:
        break
        
    out.write(frame)
    cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to exit early
        break

cap.release()  # Release the camera
out.release()  # Release the video writer
cv2.destroyAllWindows()  # Close all OpenCV windows


You can load the video file into memory and save all frames to the `files/frames` folder:

In [2]:
import cv2
import os

# Open video file
cap = cv2.VideoCapture("files/test.MOV")

# Frame counter
count = 0

while True:
    ret, frame = cap.read()

    if not ret:
        break

    # Save frame as PNG image
    filename = f"files/frames/frame_{count:04d}.png"  # Pad with zeros (e.g., frame_0001.png)
    cv2.imwrite(filename, frame)

    count += 1

cap.release()
cv2.destroyAllWindows()

print(f"Saved {count} frames to files/frames")

Saved 133 frames to files/frames


Alternatively, here's how to capture and save a single frame from your web cam:

In [3]:
import cv2
import os

# Capture a frame from the camera
cap = cv2.VideoCapture(0)  # Open the default camera
ret, frame = cap.read()    # Read a frame from the camera
cap.release()              # Release the camera

if frame is None:
    print("Failed to load image")

# Specify the folder name where you want to save the frame
folder_name = 'files/frames'

# Check if the folder exists, if not, create it
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

# Specify the path and name of the file you want to save
# For example, saving the image as frame1.png in the frames folder
file_name = os.path.join(folder_name, 'frame1.png')

# Save the frame as a .png image
cv2.imwrite(file_name, frame)

print(f'Frame saved as {file_name}')

Frame saved as files/frames/frame1.png


## Moondream AI, a small vision model that runs anywhere

See [moondream.ai](https://moondream.ai) or the Github repo [here](https://github.com/vikhyat/moondream) for more information.

In [6]:
import os
import moondream as md
from PIL import Image
import gzip
import shutil

model_links = {
    "moondream-2b-int8": "https://huggingface.co/vikhyatk/moondream2/resolve/9dddae84d54db4ac56fe37817aeaeb502ed083e2/moondream-2b-int8.mf.gz",
    "moondream-2b-int4": "https://huggingface.co/vikhyatk/moondream2/resolve/9dddae84d54db4ac56fe37817aeaeb502ed083e2/moondream-2b-int4.mf.gz",
    "moondream-0_5b-int8": "https://huggingface.co/vikhyatk/moondream2/resolve/9dddae84d54db4ac56fe37817aeaeb502ed083e2/moondream-0_5b-int8.mf.gz",
    "moondream-0_5b-int4": "https://huggingface.co/vikhyatk/moondream2/resolve/9dddae84d54db4ac56fe37817aeaeb502ed083e2/moondream-0_5b-int4.mf.gz",
}

model_to_use = "moondream-0_5b-int8"

# Download and decompress the model if it doesn't exist
if not os.path.exists(f"./files/{model_to_use}.bin"):
    # Download the gzipped file
    os.system(f"wget {model_links[model_to_use]} -O ./files/{model_to_use}.mf.gz")
    
    # Decompress the file
    with gzip.open(f'./files/{model_to_use}.mf.gz', 'rb') as f_in:
        with open(f'./files/{model_to_use}.bin', 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    
    # Clean up the gzipped file
    os.remove(f'./files/{model_to_use}.mf.gz')

# initialize the downloaded model
if not 'model' in locals():
    model = md.vl(model=f"./files/{model_to_use}.bin")

# process the image
image = Image.open("./files/frames/frame1.png")
encoded = model.encode_image(image)

# query the image
result = model.query(encoded, "On a scale from 1 to 10, how hot is the person in this image?")
print("Answer: ", result["answer"])

Answer:   The image is not on a scale, so it's not possible to determine a temperature.
