In [6]:

X1, Y1 = 25, 500
X2, Y2 = 2125, 2000

In [8]:
# Function to apply K-means clustering segmentation and create an inverse mask
def kmeans_segmentation(roi_frame, k=2):
    # Convert the frame to a 2D array of pixels
    pixel_values = roi_frame.reshape((-1, 3))
    pixel_values = np.float32(pixel_values)

    # Define the criteria for the K-means algorithm
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.2)

    # Apply K-means clustering
    _, labels, centers = cv2.kmeans(pixel_values, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    
    # Convert back to 8-bit values and reshape to the original image dimensions
    centers = np.uint8(centers)
    segmented_roi = centers[labels.flatten()]
    segmented_roi = segmented_roi.reshape(roi_frame.shape)

    # Convert to grayscale (for binary mask creation)
    gray_roi = cv2.cvtColor(segmented_roi, cv2.COLOR_BGR2GRAY)

    # Create an inverse binary mask (thresholding)
    _, mask = cv2.threshold(gray_roi, 128, 255, cv2.THRESH_BINARY_INV)

    # Apply erosion followed by dilation (morphological operations)
    kernel = np.ones((9, 9), np.uint8)  # You can adjust the kernel size if needed
    eroded_mask = cv2.erode(mask, kernel, iterations=2)
    dilated_mask = cv2.dilate(eroded_mask, kernel, iterations=5)

    return dilated_mask

In [9]:
def area_calculated(frame, width, height, resize_factor, X1, Y1, X2, Y2):
    # Resize the original frame by the specified factor
    resized_frame = cv2.resize(frame, (int(width / resize_factor),
                                        int(height / resize_factor)))

    # Ensure the ROI coordinates fit the resized frame
    roi_X1 = max(0, int(X1 / resize_factor))
    roi_Y1 = max(0, int(Y1 / resize_factor))
    roi_X2 = min(resized_frame.shape[1], int(X2 / resize_factor))
    roi_Y2 = min(resized_frame.shape[0], int(Y2 / resize_factor))

    # Crop the frame to the ROI
    roi_frame = resized_frame[roi_Y1:roi_Y2, roi_X1:roi_X2]

    if roi_frame.size == 0:
        print(f"Error: Empty ROI with coordinates {(roi_X1, roi_Y1, roi_X2, roi_Y2)}")
        return resized_frame  # Skip this frame if ROI is invalid

    # Apply K-means clustering and morphological operations inside the ROI
    mask = kmeans_segmentation(roi_frame, k=2)

    # Convert the mask back to 3 channels to overlay it on the original frame
    mask_3channel = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)

    # Place the segmented ROI mask back into the original resized frame
    resized_frame[roi_Y1:roi_Y2, roi_X1:roi_X2] = mask_3channel

    return resized_frame

In [10]:
def model_predict(frame, model, width, height, resize_factor, X1, Y1, X2, Y2):
    
    # Draw the ROI rectangle on the full frame
    cv2.rectangle(frame, (X1, Y1), (X2, Y2), (0, 0, 0), 20)  # Black rectangle with 20px thickness

    # Crop the frame to the specified ROI for prediction
    roi_frame = frame[Y1:Y2, X1:X2]

    # Perform prediction on the cropped ROI frame (assumed model is defined)
    results = model.predict(source=roi_frame)

    # Get the result (this will return the frame with bounding boxes or segmentation)
    segmented_frame = results[0].plot()

    # Place the segmented frame back into the original frame at the ROI location
    frame[Y1:Y2, X1:X2] = segmented_frame

    # Resize the full frame for display
    resized_frame = cv2.resize(frame, (width // resize_factor, height // resize_factor))

    return resized_frame

In [None]:
def main(video_path,model, resize_factor, roi_coordinates):
    X1, Y1, X2, Y2 = roi_coordinates
    cap = cv2.VideoCapture(video_path)

 
    if not cap.isOpened():
        print("Error: Could not open video.")
        return  


    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Loop through the video frames
    while cap.isOpened():
        ret, frame = cap.read()

        # Break the loop if no more frames
        if not ret:
            print("Error: Could not read frame.")
            break

        processed_frame = area_calculated(frame, width, height, resize_factor, X1, Y1, X2, Y2)
        predicted_frame = model_predict(frame, model, width, height, resize_factor, X1, Y1, X2, Y2)

        collage = np.hstack((predicted_frame, processed_frame))
    
        cv2.imshow('Collage Frame', collage)
 
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [12]:
main(video_path, model, resize_factor, roi_coordinates)


0: 480x640 1 Rotten Potato, 193.6ms
Speed: 10.4ms preprocess, 193.6ms inference, 56.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Rotten Potato, 35.0ms
Speed: 4.8ms preprocess, 35.0ms inference, 5.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Rotten Potato, 150.5ms
Speed: 3.1ms preprocess, 150.5ms inference, 4.6ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Rotten Potato, 35.0ms
Speed: 5.5ms preprocess, 35.0ms inference, 6.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 Rotten Potatos, 33.7ms
Speed: 8.4ms preprocess, 33.7ms inference, 6.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 Rotten Potatos, 38.0ms
Speed: 1.5ms preprocess, 38.0ms inference, 5.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Rotten Potato, 62.6ms
Speed: 5.5ms preprocess, 62.6ms inference, 9.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 Rotten Potato, 77.8ms
Speed: 0.0ms preprocess, 

KeyboardInterrupt: 