In [82]:
import cv2
import numpy as np
from IPython.display import display as disp, Image as IPyImage, clear_output
import time
from ultralytics import YOLO
import os
from PIL import Image
import shutil
import matplotlib.pyplot as plt

## draw rectangle

In [2]:
import cv2

# Define the source video and the output file
input_video_path = 'videos/testVideo2.mp4'
output_video_path = 'output/drawRectangle.mp4'

# Create a VideoCapture object
cap = cv2.VideoCapture(input_video_path)

# Check if the video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get the video frame width and height
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create a VideoWriter object to write the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Use 'XVID' if saving as .avi
out = cv2.VideoWriter(output_video_path, fourcc, 20.0, (frame_width, frame_height))

# Define the rectangle properties (start_point, end_point, color, and thickness)
start_point = (10, 10)  # top-left corner of the rectangle
end_point = (200, 200)  # bottom-right corner of the rectangle
color = (255, 0, 0)  # Blue color in BGR
thickness = 2  # Line thickness

# Read each frame from the video
while cap.isOpened():
    ret, frame = cap.read()
    
    if ret:
        # Draw the rectangle on the current frame
        cv2.rectangle(frame, start_point, end_point, color, thickness)
        
        # Write the frame into the output video file
        out.write(frame)
        
        # Display the resulting frame (optional)
        cv2.imshow('Frame', frame)
        
        # Press Q on keyboard to exit before the video ends (optional)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# Release everything when done
cap.release()
out.release()
cv2.destroyAllWindows()

print("The video was processed and saved successfully!")


The video was processed and saved successfully!


## ROI

In [83]:
def display_video_with_rectangles(video_path, rect1_coords, rect2_coords):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    try:
        while True:
            ret, frame = cap.read()

            if not ret:
                print("Stream ended.")
                break

            # Draw rectangles
            cv2.rectangle(frame, rect1_coords[0], rect1_coords[1], (0, 255, 0), 2)
            # cv2.rectangle(frame, rect2_coords[0], rect2_coords[1], (0, 0, 255), 2)

            # Convert to JPEG
            _, buffer = cv2.imencode('.jpg', frame)

            # Display the frame
            disp(IPyImage(data=buffer.tobytes()))
            clear_output(wait=True)
            
            # Sleep to slow down the loop
            time.sleep(0.04)
    finally:
        cap.release()
        cv2.destroyAllWindows()


In [87]:
video_path = "sample/testVideo.mp4"
rect1_coords = ((240, 60), (450, 190))  # Top-left and bottom-right corners for the first rectangle
# rect2_coords = ((670, 550), (950, 600)) # Top-left and bottom-right corners for the second rectangle

display_video_with_rectangles(video_path, rect1_coords, rect2_coords)

Stream ended.


## ROI and detection

In [77]:
# Initialize the YOLO model
model = YOLO('handDetection.pt')

def display_video_with_detections_and_rectangles(video_path, rect1_coords, output_path):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Get the video width, height, and frames per second
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Define the codec and create a VideoWriter object to save the output video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    try:
        while True:
            ret, frame = cap.read()

            if not ret:
                print("Stream ended.")
                break

            # Run inference on the frame
            results = model(frame)

            # Loop through all detected hands and draw them
            for box_data in results[0].boxes.data.cpu().numpy():
                frame = draw_detections_on_frame(frame, box_data, results[0].names)

            # Draw predefined rectangle
            cv2.rectangle(frame, rect1_coords[0], rect1_coords[1], (0, 255, 0), 2)

            # Save the frame to the output video
            out.write(frame)
            
            # Sleep to slow down the loop
            time.sleep(0.04)
    finally:
        cap.release()
        out.release()
        cv2.destroyAllWindows()

# Function to draw the detections onto a frame
def draw_detections_on_frame(frame, box_data, names):
    x1, y1, x2, y2, conf, cls = box_data
    label = f'{names[int(cls)]} {conf:.2f}'
    color = [int(c) for c in (255, 0, 0)]
    tl = round(0.002 * (frame.shape[0] + frame.shape[1]) / 2) + 1
    c1, c2 = (int(x1), int(y1)), (int(x2), int(y2))
    cv2.rectangle(frame, c1, c2, color, thickness=tl)
    tf = max(tl - 1, 1)
    t_size = cv2.getTextSize(label, 0, fontScale=tf / 3, thickness=tf)[0]
    c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
    cv2.rectangle(frame, c1, c2, color, -1)
    cv2.putText(frame, label, (c1[0], c1[1] - 2), 0, tf / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return frame

# Usage
video_path = "sample/testVideo.mp4"
output_path = "output/testVideo.mp4"
# rect1_coords = ((1050, 1550), (1830, 2500))  # Top-left and bottom-right corners for the first rectangle
rect1_coords = ((700, 400), (1100, 700))

display_video_with_detections_and_rectangles(video_path, rect1_coords, output_path)



0: 384x640 3 Human hands, 5.5ms
Speed: 5.4ms preprocess, 5.5ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 6.2ms
Speed: 2.9ms preprocess, 6.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 6.0ms
Speed: 4.0ms preprocess, 6.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 5.5ms
Speed: 2.7ms preprocess, 5.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 6.0ms
Speed: 3.6ms preprocess, 6.0ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 5.3ms
Speed: 2.6ms preprocess, 5.3ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 Human hands, 6.0ms
Speed: 3.2ms preprocess, 6.0ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 Human hands, 5.4ms
Speed: 3.2ms preprocess, 5.4ms inference, 1.0ms postprocess per 

Stream ended.


## Count Cycles

In [3]:
# import cv2
# import time
# from ultralytics import YOLO
# from utilities import is_inside, draw_detections_on_frame

# class State:
#     WAIT_FOR_PICKUP = 1
#     WAIT_FOR_DROP = 2

# # Initialize the YOLO model
# model = YOLO('handDetection.pt')

# def display_video_with_detections_and_rectangles(video_path, rect1_coords, pickup_coords, drop_coords, output_path):
#     cap = cv2.VideoCapture(video_path)
#     if not cap.isOpened():
#         print("Error: Could not open video.")
#         return

#     # Define video properties
#     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
#     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
#     fps = int(cap.get(cv2.CAP_PROP_FPS))
#     reduced_fps = 5  # process at 5 fps
#     frames_to_skip = max(1, fps // reduced_fps)

#     # State variables
#     state = State.WAIT_FOR_PICKUP
#     count = 0
#     hand_was_in_drop = False

#     # Initialize video writer
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#     out = cv2.VideoWriter(output_path, fourcc, reduced_fps, (width, height))

#     frame_count = 0

#     try:
#         while True:
#             ret, frame = cap.read()
#             if not ret:
#                 print("Stream ended.")
#                 break

#             if frame_count % frames_to_skip == 0:  # Skip frames to process at reduced fps
#                 # Run inference on the frame
#                 results = model(frame, conf=0.05, iou=0.5)
#                 hand_detected_in_pickup = any(is_inside(map(int, box_data[:4]), pickup_coords) for box_data in results[0].boxes.data.cpu().numpy())
#                 hand_detected_in_drop = any(is_inside(map(int, box_data[:4]), drop_coords) for box_data in results[0].boxes.data.cpu().numpy())

#                 # State machine logic
#                 if state == State.WAIT_FOR_PICKUP and hand_detected_in_pickup:
#                     state = State.WAIT_FOR_DROP

#                 if state == State.WAIT_FOR_DROP and hand_detected_in_drop:
#                     hand_was_in_drop = True

#                 if hand_was_in_drop and not hand_detected_in_drop and hand_detected_in_pickup:
#                     count += 1
#                     state = State.WAIT_FOR_DROP
#                     hand_was_in_drop = False

#                 # Draw detections and update the frame
#                 for box_data in results[0].boxes.data.cpu().numpy():
#                     frame = draw_detections_on_frame(frame, box_data, results[0].names)

#                 # Draw additional information on the frame
#                 cv2.putText(frame, f"Count: {count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

#                 # Draw predefined rectangle for display
#                 # cv2.rectangle(frame, rect1_coords[0], rect1_coords[1], (0, 255, 0), 2)
#                 # Draw pickup and drop rectangles
#                 cv2.rectangle(frame, pickup_coords[0], pickup_coords[1], (0, 225, 0), 2)
#                 cv2.rectangle(frame, drop_coords[0], drop_coords[1], (0, 0, 255), 2)

#                 # Write the frame to the output video
#                 out.write(frame)
            
#             # Increment frame count
#             frame_count += 1

#             # Sleep to simulate reduced processing
#             time.sleep(0.04)

#     finally:
#         cap.release()
#         out.release()
#         cv2.destroyAllWindows()

# # Usage
# video_path = "videos/testVideo2.mp4"
# output_path = "output/testVideo2.mp4"
# rect1_coords = ((700, 400), (1100, 700))
# pickup_coords = ((100, 100), (300, 300))  # Define your own pickup coordinates
# drop_coords = ((500, 400), (700, 600))    # Define your own drop coordinates

# display_video_with_detections_and_rectangles(video_path, rect1_coords, pickup_coords, drop_coords, output_path)
