# Testing a Live Stream Video #

## Packages ##

In [1]:
from ultralytics import YOLO
import cv2
import numpy as np

## Load the model ##

In [9]:
model = YOLO('best2.pt')

## Video Setup ##

In [10]:
def setup_video_capture(video_path):
    """
    Set up video capture from the specified video file and saves it in '5_new_coloredROVFeed.avi'.
    
    Args:
    video_path (str): Path to the video file.
    
    Returns:
    cap: VideoCapture object.
    size: Tuple containing the width and height of the video frames.
    result: VideoWriter object for saving the processed video.
    """
    cap = cv2.VideoCapture(video_path)
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    size = (frame_width, frame_height)
    result = cv2.VideoWriter('5_new_coloredROVFeed2.avi', cv2.VideoWriter_fourcc(*'MJPG'), 10, size)
    
    return cap, size, result

## Image Preprocessing ##

In [11]:
def image_preprocessing(frame):
    """
    Preprocess an image by converting it to grayscale, applying adaptive thresholding, 
    extracting a region of interest (ROI), and rotating the image.

    Args:
    frame (ndarray): Input video frame in BGR format.

    Returns:
    frame (ndarray): The original frame, which currently does not reflect any preprocessing changes.
    """
    # Convert the image to grayscale
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding (adjust the parameters according to conditions)
    thresh1 = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 61, 5)
    
    # Convert the grayscale image with thresholding back to BGR format
    gray_img_3channel = cv2.cvtColor(thresh1, cv2.COLOR_GRAY2BGR)
    
    # Crops the Parts of the gripper that confuses the model, used with the thresholding (uncomment if need)
    # roi = gray_img_3channel[0:435, 220:640]
    
    # Rotate the image 90 degrees clockwise (uncomment if needed)
    # rotated = cv2.rotate(gray_img_3channel, cv2.ROTATE_90_CLOCKWISE)
    
    # Note: The processed image is not being returned or used further.
    return gray_img_3channel

## Process a single frame ##

In [12]:
def process_frame(frame, result):
    """
    Process a single video frame using the YOLO model to perform object tracking, 
    then display and write the processed frame to a video file.

    Args:
    frame (ndarray): The input video frame in BGR format.
    result (cv2.VideoWriter): VideoWriter object to write the processed frames to a video file.


    Returns:
    None
    """
    # Perform object tracking on the frame using the YOLO model
    results = model.track(frame, persist=True)
    
    # Plot the tracking results on the frame
    frame_ = results[0].plot()
    
    # Write the processed frame to the output video file
    result.write(frame_)
    
    # Resize the processed frame for display purposes
    smaller_frame = cv2.resize(frame_, (0, 0), fx=0.5, fy=0.5)
    
    # Display the processed frame
    cv2.imshow('frame', frame_)

## Loop through the frames of the life stream ##

In [13]:
def main(video_path):
    """
    Main function to set up video capture, process the video, and save the output.
    
    Args:
    video_path (str): Path to the video file.
    
    Returns:
    None
    """
    # Set up video capture and video writer
    cap, size, result = setup_video_capture(video_path)
    
    # Process the video frame by frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        # Preprocess the frame (uncomment if needed)
        #processed_frame = image_preprocessing(frame)
        
        # Process the frame with object tracking and save the result
        process_frame(frame, result)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    result.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    video_path = 'test_videos/green-square_5.mp4'
    main(video_path)


0: 480x640 (no detections), 94.8ms
Speed: 4.6ms preprocess, 94.8ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 10.6ms
Speed: 1.2ms preprocess, 10.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 10.0ms
Speed: 1.0ms preprocess, 10.0ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 10.2ms
Speed: 1.8ms preprocess, 10.2ms inference, 1.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 18.7ms
Speed: 0.0ms preprocess, 18.7ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 9.4ms
Speed: 1.4ms preprocess, 9.4ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 7.6ms
Speed: 0.5ms preprocess, 7.6ms inference, 0.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 9.3ms
Speed: 0.0ms preprocess, 9.3ms inferen