In [11]:
import cv2
from datetime import datetime
from ultralytics import YOLO
from utilities import is_inside, draw_detections_on_frame

model = YOLO('handDetection.pt')

class State:
    WAIT_FOR_PICKUP = 1
    WAIT_FOR_DROP = 2

def is_entering_from_side(box, sides, frame_width, frame_height):
    x_center, y_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
    entering_from_sides = {
        'left': x_center <= frame_width * 0.25,
        'right': x_center >= frame_width * 0.75,
        'top': y_center <= frame_height * 0.25,
        'bottom': y_center >= frame_height * 0.75
    }
    return any(entering_from_sides[side] for side in sides)


def draw_side_of_rectangle(frame, rect_coords, side, color, thickness):
    p1, p2 = rect_coords
    if side == 'left':
        cv2.line(frame, p1, (p1[0], p2[1]), color, thickness)
    elif side == 'right':
        cv2.line(frame, (p2[0], p1[1]), p2, color, thickness)
    elif side == 'top':
        cv2.line(frame, p1, (p2[0], p1[1]), color, thickness)
    elif side == 'bottom':
        cv2.line(frame, (p1[0], p2[1]), p2, color, thickness)


def process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path):
    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        raise ValueError(f"Couldn't open video stream from URL: {video_url}")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    original_fps = int(cap.get(cv2.CAP_PROP_FPS))

    frame_count = 0
    state = State.WAIT_FOR_PICKUP
    count = 0
    hand_was_in_drop = False

    # Define the codec and create VideoWriter object to write the output video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, original_fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Stream ended.")
            break

        if frame_count % (original_fps // 5) == 0:  # Process at a reduced frame rate
            results = model(frame, conf=0.05, iou=0.1)
            for box_data in results[0].boxes.data.cpu().numpy():
                box_data = list(map(int, box_data[:4]))  # Convert map object to list
                if is_inside(box_data, pickup_coords) and is_entering_from_side(box_data, pickup_sides, width, height):
                    hand_detected_in_pickup = True
                    break
            else:
                hand_detected_in_pickup = False
            
            for box_data in results[0].boxes.data.cpu().numpy():
                box_data = list(map(int, box_data[:4]))  # Convert map object to list
                if is_inside(box_data, drop_coords) and is_entering_from_side(box_data, drop_sides, width, height):
                    hand_detected_in_drop = True
                    break
            else:
                hand_detected_in_drop = False

            for side in pickup_sides:
                draw_side_of_rectangle(frame, pickup_coords, side, (0, 0, 255), 2)
            for side in drop_sides:
                draw_side_of_rectangle(frame, drop_coords, side, (0, 0, 255), 2)
            
            # Update states and count
            if state == State.WAIT_FOR_PICKUP and hand_detected_in_pickup:
                state = State.WAIT_FOR_DROP

            if state == State.WAIT_FOR_DROP and hand_detected_in_drop and not hand_was_in_drop:
                hand_was_in_drop = True

            if hand_was_in_drop and not hand_detected_in_drop:
                count += 1
                state = State.WAIT_FOR_PICKUP
                hand_was_in_drop = False

            # Drawing and annotations on the frame
            for box_data in results[0].boxes.data.cpu().numpy():
                frame = draw_detections_on_frame(frame, box_data, results[0].names)

            cv2.putText(frame, f"Count: {count}", (width - 150, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Pickup: {hand_detected_in_pickup}", (width - 150, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.putText(frame, f"Drop: {hand_detected_in_drop}", (width - 150, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.putText(frame, f"Was in Drop: {hand_was_in_drop}", (width - 150, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.putText(frame, f"State: {state}", (width - 150, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, pickup_coords[0], pickup_coords[1], (0, 255, 0), 2)
            cv2.rectangle(frame, drop_coords[0], drop_coords[1], (0, 255, 0), 2)

            out.write(frame)  # Save the frame to the output video file

        frame_count += 1

    cap.release()
    out.release()  # Make sure to release the VideoWriter object


In [13]:

# # Example usage
# video_url = 'videos/testVideo2.mp4'
# pickup_coords = [[5, 10], [120, 140]]
# drop_coords = [[240, 60], [450, 190]]
# pickup_sides = ['right', 'bottom']
# drop_sides = ['bottom']
# output_path = 'output/output.mp4'

# process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path)


In [14]:
video_url = 'videos/testVideo2.mp4'
pickup_coords = [[5, 10], [120, 140]]
drop_coords = [[240, 60], [450, 190]]
pickup_sides = ['right', 'bottom']
drop_sides = ['bottom']
output_path = 'output/output.mp4'

In [40]:
import cv2
from ultralytics import YOLO
from utilities import is_inside, draw_detections_on_frame

In [33]:
model = YOLO('handDetection.pt')

class State:
    WAIT_FOR_PICKUP = 1
    WAIT_FOR_DROP = 2

# Function to determine if a box enters or exits from a specified side
def is_entering_from_side(box, sides, frame_width, frame_height):
    x_center, y_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
    entering_from_sides = {
        'left': x_center <= frame_width * 0.25,
        'right': x_center >= frame_width * 0.75,
        'top': y_center <= frame_height * 0.25,
        'bottom': y_center >= frame_height * 0.75
    }
    return any(entering_from_sides[side] for side in sides)

# Function to draw and highlight regions
def draw_and_highlight_regions(frame, coords, sides, roiColor, sideColor, thickness=2, highlight_thickness=3):
    # Draw rectangle
    cv2.rectangle(frame, tuple(coords[0]), tuple(coords[1]), roiColor, thickness)
    # Highlight sides
    for side in sides:
        if side == 'left':
            cv2.line(frame, (coords[0][0], coords[0][1]), (coords[0][0], coords[1][1]), sideColor, highlight_thickness)
        elif side == 'right':
            cv2.line(frame, (coords[1][0], coords[0][1]), (coords[1][0], coords[1][1]), sideColor, highlight_thickness)
        elif side == 'top':
            cv2.line(frame, (coords[0][0], coords[0][1]), (coords[1][0], coords[0][1]), sideColor, highlight_thickness)
        elif side == 'bottom':
            cv2.line(frame, (coords[0][0], coords[1][1]), (coords[1][0], coords[1][1]), sideColor, highlight_thickness)

def process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, out_frames):
    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        raise ValueError(f"Couldn't open video stream from URL: {video_url}")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    original_fps = int(cap.get(cv2.CAP_PROP_FPS))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # or 'XVID'
    out = cv2.VideoWriter(output_path, fourcc, original_fps, (width, height))
    
    frame_count = 0
    state = State.WAIT_FOR_PICKUP
    count = 0
    hand_was_in_drop = False

    last_count = -1
    last_time = None
    cycle_time = 0.0

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Stream ended.")
                break

            if frame_count % (original_fps // 5) == 0:  # limiting to 5 fps
                results = model(frame, conf=0.05, iou=0.2)
                hand_detected_in_pickup = any(is_entering_from_side(list(map(int, box_data[:4])), pickup_sides, width, height) for box_data in results[0].boxes.data.cpu().numpy())
                hand_detected_in_drop = any(is_entering_from_side(list(map(int, box_data[:4])), drop_sides, width, height) for box_data in results[0].boxes.data.cpu().numpy())



                # State and count update logic
                if state == State.WAIT_FOR_PICKUP and hand_detected_in_pickup:
                    state = State.WAIT_FOR_DROP

                if state == State.WAIT_FOR_DROP and hand_detected_in_drop:
                    hand_was_in_drop = True

                if hand_was_in_drop and not hand_detected_in_drop:
                    count += 1
                    state = State.WAIT_FOR_PICKUP
                    hand_was_in_drop = False
                for box_data in results[0].boxes.data.cpu().numpy():
                    frame = draw_detections_on_frame(frame, box_data, results[0].names)
    
                cv2.putText(frame, f"Count: {count}", (width - 150, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, f"Pickup: {hand_detected_in_pickup}", (width - 150, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.putText(frame, f"Drop: {hand_detected_in_drop}", (width - 150, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.putText(frame, f"Was in Drop: {hand_was_in_drop}", (width - 150, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.putText(frame, f"State: {state}", (width - 150, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.rectangle(frame, pickup_coords[0], pickup_coords[1], (0, 255, 0), 2)
                cv2.rectangle(frame, drop_coords[0], drop_coords[1], (0, 255, 0), 2)
                draw_and_highlight_regions(frame, pickup_coords, pickup_sides, (0, 255, 0), (0, 0, 255))
                draw_and_highlight_regions(frame, drop_coords, drop_sides, (0, 255, 0), (0, 0, 255))

                out.write(frame)  # Save the frame to the output video file

            frame_count += 1

    finally:
        cap.release()
        out.release()
        cv2.destroyAllWindows()

In [38]:
model = YOLO('handDetection.pt')
class State:
    WAIT_FOR_PICKUP = 1
    WAIT_FOR_DROP = 2

def is_entering_from_side(box, region, sides):
    x_center, y_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
    region_width = region[1][0] - region[0][0]
    region_height = region[1][1] - region[0][1]
    region_x_center = (region[0][0] + region[1][0]) / 2
    region_y_center = (region[0][1] + region[1][1]) / 2

    entering_from_sides = {
        'left': x_center <= region_x_center - region_width * 0.25,
        'right': x_center >= region_x_center + region_width * 0.25,
        'top': y_center <= region_y_center - region_height * 0.25,
        'bottom': y_center >= region_y_center + region_height * 0.25
    }
    return any(entering_from_sides[side] for side in sides)

def process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path):
    model = YOLO('handDetection.pt')
    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        raise ValueError(f"Couldn't open video stream from URL: {video_url}")
  
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = model(frame, conf=0.05, iou=0.2)

            for box_data in results[0].boxes.data.cpu().numpy():
                box = list(map(int, box_data[:4]))
                entering_pickup = is_entering_from_side(box, pickup_coords, pickup_sides)
                entering_drop = is_entering_from_side(box, drop_coords, drop_sides)

                label_pickup = f"Entering Pickup: {entering_pickup}"
                label_drop = f"Entering Drop: {entering_drop}"
                cv2.putText(frame, label_pickup, (box[0], box[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                cv2.putText(frame, label_drop, (box[0], box[1]-25), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
                cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)

            out.write(frame)

    finally:
        cap.release()
        out.release()
        cv2.destroyAllWindows()

# Example usage
video_url = 'videos/testVideo2.mp4'
output_path = 'output/test_output.mp4'
pickup_coords = [[5, 10], [120, 140]]  # Define pickup ROI coordinates
drop_coords = [[240, 60], [450, 190]]  # Define drop ROI coordinates
pickup_sides = ["right", "bottom"]      # Define pickup sides to check
drop_sides = ["bottom"]                 # Define drop sides to check
process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path)


In [35]:

# Example usage
video_url = 'videos/testVideo2.mp4'
pickup_coords = [[5, 10], [120, 140]]
drop_coords = [[240, 60], [450, 190]]
pickup_sides = ['right', 'bottom']
drop_sides = ['bottom']
output_path = 'output/output.mp4'

process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path)


In [None]:
# Function to draw and highlight regions
def draw_and_highlight_regions(frame, coords, sides, roiColor, sideColor, thickness=2, highlight_thickness=3):
    # Draw rectangle
    cv2.rectangle(frame, tuple(coords[0]), tuple(coords[1]), roiColor, thickness)
    # Highlight sides
    if 'left' in sides:
        cv2.line(frame, (coords[0][0], coords[0][1]), (coords[0][0], coords[1][1]), sideColor, highlight_thickness)
    if 'right' in sides:
        cv2.line(frame, (coords[1][0], coords[0][1]), (coords[1][0], coords[1][1]), sideColor, highlight_thickness)
    if 'top' in sides:
        cv2.line(frame, (coords[0][0], coords[0][1]), (coords[1][0], coords[0][1]), sideColor, highlight_thickness)
    if 'bottom' in sides:
        cv2.line(frame, (coords[0][0], coords[1][1]), (coords[1][0], coords[1][1]), sideColor, highlight_thickness)

# Initialize video capture and writer
cap = cv2.VideoCapture(video_url)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

# Process video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Draw and highlight pickup region in green
    draw_and_highlight_regions(frame, pickup_coords, pickup_sides, (0, 255, 0), (0, 0, 255))
    # Draw and highlight drop region in red
    draw_and_highlight_regions(frame, drop_coords, drop_sides, (0, 255, 0), (0, 0, 255))

    out.write(frame)

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

In [1]:
import cv2
from ultralytics import YOLO

In [4]:
model = YOLO('handDetection.pt').to('cpu')

def is_entering_from_side(box, region, sides):
    x_center, y_center = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
    region_width = region[1][0] - region[0][0]
    region_height = region[1][1] - region[0][1]
    region_x_center = (region[0][0] + region[1][0]) / 2
    region_y_center = (region[0][1] + region[1][1]) / 2

    entering_from_sides = {
        'left': x_center < region[0][0],  # Entering from left if center is to the left of the region
        'right': x_center > region[1][0],  # Entering from right if center is to the right of the region
        'top': y_center < region[0][1],  # Entering from top if center is above the region
        'bottom': y_center > region[1][1]  # Entering from bottom if center is below the region
    }
    return any(entering_from_sides[side] for side in sides)

def process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path):
    cap = cv2.VideoCapture(video_url)
    if not cap.isOpened():
        raise ValueError(f"Couldn't open video stream from URL: {video_url}")

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            results = model(frame, conf=0.05, iou=0.2)

            for box_data in results[0].boxes.data.cpu().numpy():
                box = list(map(int, box_data[:4]))
                entering_pickup = is_entering_from_side(box, pickup_coords, pickup_sides)
                entering_drop = is_entering_from_side(box, drop_coords, drop_sides)
                cv2.putText(frame, f"entering_pickup: {str(entering_pickup)}", (width - 150, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.putText(frame, f"entering_drop: {str(entering_drop)}", (width - 150, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

            out.write(frame)

    finally:
        cap.release()
        out.release()
        cv2.destroyAllWindows()

# Example usage
video_url = 'videos/testVideo2.mp4'
pickup_coords = [[5, 10], [120, 140]]
drop_coords = [[240, 60], [450, 190]]
pickup_sides = ['right', 'bottom']
drop_sides = ['bottom']
output_path = 'output/output.mp4'

process_video(video_url, pickup_coords, drop_coords, pickup_sides, drop_sides, output_path)


0: 384x640 1 Human hand, 26.5ms
Speed: 2.2ms preprocess, 26.5ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 28.4ms
Speed: 1.2ms preprocess, 28.4ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 26.8ms
Speed: 1.3ms preprocess, 26.8ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 26.9ms
Speed: 1.2ms preprocess, 26.9ms inference, 0.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 26.1ms
Speed: 1.2ms preprocess, 26.1ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 26.2ms
Speed: 1.2ms preprocess, 26.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Human hand, 27.2ms
Speed: 1.2ms preprocess, 27.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 Human hands, 26.3ms
Speed: 1.2ms preprocess, 26.3ms inference, 0.4ms postpro