<a href="https://colab.research.google.com/github/alikaiser12/AI/blob/main/video_annotation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To get the extracted images (frames) from a video using OpenCV, you need to load the video and read each frame in a loop, then save or process each frame as an image. Here's a step-by-step breakdown with a working example:

✅ Step-by-step using OpenCV
🔹 1. Install OpenCV (if not already):
🐍 Using OpenCV in Python
Install OpenCV:

In [None]:
import cv2
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Load a pretrained YOLOv8n model
cap = cv2.VideoCapture("input_video.mp4")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    results = model(frame)  # Detect objects
    annotated_frame = results[0].plot()  # Draw bounding boxes
    cv2.imshow("Annotated Video", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()

In [None]:
import os
from glob import glob
import re
import numpy as np
import cv2
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

# Define the folder path in Google Drive (adjust according to your folder structure)
data_path = '/content/drive/MyDrive/Video'

# List .jpg files in the folder
image_paths = [os.path.join(data_path, file) for file in os.listdir(data_path) if file.lower().endswith('.mp4')]

# Print the list of image paths
print(image_paths)

['/content/drive/MyDrive/Video/video-2 clip-1 (30 sec).mp4']


In [None]:
import glob
import re
import os

# Define the folder path in Google Drive
data_path = '/content/drive/MyDrive/Video'

# Use glob to get all jpg files in the folder
# If a filename doesn't have digits, it will be treated as 0 for sorting.
image_paths = sorted(glob.glob(f"{data_path}/*.mp4"), key=lambda x: float(re.findall("(\d+)", x)[0]) if re.findall("(\d+)", x) else 0)

# Print the list of image paths
print(image_paths)

['/content/drive/MyDrive/Video/video-2 clip-1 (30 sec).mp4']


**Python script to extract frames:**

In [None]:
!pip install ultralytics
import cv2
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # Load a pretrained YOLOv8n model
cap = cv2.VideoCapture("video-2 clip-1 (30 sec).mp4")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    results = model(frame)  # Detect objects
    annotated_frame = results[0].plot()  # Draw bounding boxes
    cv2.imshow("Annotated Video", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()



In [None]:
pip install opencv-python




**Python script to extract frames:**

In [None]:
import cv2
import os

# Create output directory
os.makedirs("frames", exist_ok=True)

# Load video
cap = cv2.VideoCapture("video-2 clip-1 (30 sec).mp4")
frame_number = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    # Save frame as image
    cv2.imwrite(f"frames/frame_{frame_number:04d}.png", frame)
    frame_number += 1

cap.release()


In [None]:
import cv2
import os

# Create a folder to save extracted frames
output_folder = "frames"
os.makedirs(output_folder, exist_ok=True)

# Load the video file
video_path = "/content/drive/MyDrive/Video/video-2 clip-1 (30 sec).mp4"
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

frame_number = 0

# Loop through video frames
while True:
    ret, frame = cap.read()
    if not ret:
        break  # No more frames, end of video

    # Optional: Do something with the frame (e.g., display, process, etc.)

    # Save each frame as an image
    frame_filename = os.path.join(output_folder, f"frame_{frame_number:04d}.png")
    cv2.imwrite(frame_filename, frame)

    frame_number += 1

# Release the video capture object
cap.release()
print(f"Done! Extracted {frame_number} frames to '{output_folder}' folder.")


Done! Extracted 900 frames to 'frames' folder.


In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt



fatal: destination path 'yolov5' already exists and is not an empty directory.
/content/yolov5


**This is the file for the next cell:**
--source ../frames: Your extracted frame folder

--weights yolov5s.pt: Pretrained weights (small, fast model)

--save-txt: Saves YOLO annotations (.txt) next to images

--save-conf: Saves confidence score

--project ../output --name annotated_frames: Where results go

In [None]:
!python detect.py --source /content/frames --weights yolov5s.pt --conf 0.4 --save-txt --save-conf --project ../output --name annotated_frames

[34m[1mdetect: [0mweights=['yolov5s.pt'], source=/content/frames, data=data/coco128.yaml, imgsz=[640, 640], conf_thres=0.4, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_format=0, save_csv=False, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=../output, name=annotated_frames, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v7.0-416-gfe1d4d99 Python-3.11.12 torch-2.6.0+cu124 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
image 1/900 /content/frames/frame_0000.png: 384x640 (no detections), 335.6ms
image 2/900 /content/frames/frame_0001.png: 384x640 (no detections), 270.3ms
image 3/900 /content/frames/frame_0002.png: 384x640 1 car, 221.4ms
image 4/900 /content/frames/frame_0003.png: 384x640 1 car, 264.8ms
image 5/900 /content/frames/frame_0004.png: 384x640

In [None]:
import cv2
import os

image_folder = "/content/output/annotated_frames3"  # Changed to include the subfolder
output_video = "annotated_video.mp4"
fps = 30  # Set your original video's FPS

# Check if the folder exists
if not os.path.exists(image_folder):
    print(f"Error: Folder '{image_folder}' not found. Make sure you have run the object detection and annotation step.")
    exit()

# Get all files with .png extension
images = sorted([img for img in os.listdir(image_folder) if img.endswith(".png")])

if not images:
    print(f"Error: No image files found in '{image_folder}'.")
    exit()

frame_path = os.path.join(image_folder, images[0])
frame = cv2.imread(frame_path)
height, width, layers = frame.shape

video = cv2.VideoWriter(output_video, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

for image_name in images:
    image = cv2.imread(os.path.join(image_folder, image_name))
    video.write(image)

video.release()
print("✅ Annotated video saved as:", output_video)

✅ Annotated video saved as: annotated_video.mp4


✅ STEP 2: Rebuild Annotated Images into a Video (using FFmpeg or OpenCV)

✅ Step-by-Step: Get Object Direction from Video
🔹 1. Use YOLO (or any object detector) to detect and annotate objects
You've already got this from previous steps — YOLO detects objects frame by frame and gives you:

Bounding boxes (center x, y per object)

Class labels (e.g., person, car, etc.)

Optionally, confidence scores

🔹 2. Track Objects Across Frames
To detect direction, you need to know how an object moves across time, not just frame-by-frame.

Use a tracker to assign a unique ID to each object and follow it frame by frame:

🔧 Recommended Trackers:
SORT (Simple Online Realtime Tracker)

Deep SORT (adds appearance features — better tracking accuracy)

You can plug YOLO + Deep SORT using existing projects like:

https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet

These tools will output:

text
Copy
Edit
Frame | Object_ID | x_center | y_center | width | height | class
So now you know how each object moves across frames.

🔹 3. Calculate Direction Vectors
For each object ID, calculate the direction of movement using its centroid positions:

In [None]:
import math

def get_direction(prev_x, prev_y, curr_x, curr_y):
    dx = curr_x - prev_x
    dy = curr_y - prev_y

    angle = math.degrees(math.atan2(-dy, dx))  # y axis is flipped in image coords
    angle = (angle + 360) % 360  # Normalize to 0–360

    return angle


Then map angle to compass directions:

In [None]:
def angle_to_direction(angle):
    directions = ['East', 'North-East', 'North', 'North-West',
                  'West', 'South-West', 'South', 'South-East']
    index = int(((angle + 22.5) % 360) // 45)
    return directions[index]


🔹 4. Generate a Direction Profile
For each tracked object:

Take its first and last centroid position

Calculate movement vector and direction using the above functions

Optionally: accumulate directions to get majority movement (if noisy)

Example:

In [None]:
# Object 1 moved from (100, 250) to (300, 200)
angle = get_direction(100, 250, 300, 200)
direction = angle_to_direction(angle)
print(f"Object is moving towards: {direction}")


Object is moving towards: East


✅ Final Step: Add Direction Labels to Annotated Frames (Optional)
You can annotate direction on the frames using OpenCV:

python
Copy
Edit

In [None]:
import cv2
import os

# Assuming 'x' and 'y' represent the coordinates where you want to place the text
# Replace these with the actual x and y coordinates of the object's center
x = 100  # Example value for x
y = 200  # Example value for y


cv2.putText(frame, f"Dir: {direction}", (x, y - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [None]:
import cv2
import numpy as np


In [None]:
cap = cv2.VideoCapture('annotated_video.mp4')  # Replace with 0 for webcam


In [None]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow # Import the cv2_imshow function

cap = cv2.VideoCapture('annotated_video.mp4')  # Replace with 0 for webcam

# Check if the video file was opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

ret, first_frame = cap.read()

# Check if a frame was read successfully
if not ret:
    print("Error: Could not read frame from video.")
    exit()

prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert current frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Compute dense optical flow using Farneback method
    flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                        pyr_scale=0.5, levels=3, winsize=15,
                                        iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

    # Compute magnitude and angle of 2D vectors
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees=True)

    # Visualize the direction
    hsv = np.zeros_like(frame)
    hsv[..., 1] = 255

    # Set hue according to the optical flow direction
    hsv[..., 0] = angle / 2

    # Set value according to the optical flow magnitude (normalized)
    hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)

    # Convert HSV to BGR for display
    bgr_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    # Display the result using cv2_imshow instead of cv2.imshow
    cv2_imshow(bgr_flow) # Use cv2_imshow from google.colab.patches

    # Update previous frame
    prev_gray = gray

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow

# Load the video
cap = cv2.VideoCapture('annotated_video.mp4')

if not cap.isOpened():
    print("Error: Could not open video file.")
else:
    ret, first_frame = cap.read()

    if not ret:
        print("Error: Could not read frame from video.")
    else:
        prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

        frame_count = 0
        max_frames = 30  # Limit to avoid crash (adjust based on need)

        while frame_count < max_frames:
            ret, frame = cap.read()
            if not ret:
                break

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Calculate optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                                0.5, 3, 15, 3, 5, 1.2, 0)
            magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees=True)

            # Create HSV image
            hsv = np.zeros_like(frame)
            hsv[..., 1] = 255
            hsv[..., 0] = angle / 2  # Hue
            hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)  # Value

            bgr_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

            # Display frame
            cv2_imshow(bgr_flow)

            prev_gray = gray
            frame_count += 1

cap.release()


In [None]:
import cv2
import numpy as np
import os

# Load video
cap = cv2.VideoCapture('annotated_video.mp4')

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
else:
    # Read the first frame
    ret, first_frame = cap.read()
    if not ret:
        print("Error: Could not read frame from video.")
        cap.release()
    else:
        # Set up optical flow
        prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)

        # Set up video writer
        height, width = first_frame.shape[:2]
        out_filename = 'optical_flow_output.mp4'
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # For .mp4 format
        out = cv2.VideoWriter(out_filename, fourcc, 20.0, (width, height))  # 20 FPS

        # Loop over frames
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Optical flow calculation
            flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                                0.5, 3, 15, 3, 5, 1.2, 0)
            magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1], angleInDegrees=True)

            # HSV image
            hsv = np.zeros_like(frame)
            hsv[..., 1] = 255
            hsv[..., 0] = angle / 2  # Hue
            hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)

            bgr_flow = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

            out.write(bgr_flow)  # Write frame to output video

            prev_gray = gray

        # Release resources
        cap.release()
        out.release()
        print(f"Optical flow video saved as {out_filename}")


Optical flow video saved as optical_flow_output.mp4


In [None]:
from google.colab import files
files.download('optical_flow_output.mp4')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>