In [18]:
import torch
print("CUDA available:", torch.cuda.is_available())

CUDA available: True


In [19]:
from ultralytics import YOLO
import cv2
import os

# Create the output directory if it doesn't exist
output_dir = "../outputs/tracked_vehicles"
os.makedirs(output_dir, exist_ok=True)

# Load YOLO models (assuming class 2 corresponds to cars)
detection_model = YOLO("../yolo_weights/yolo12x.pt")
segmentation_model = YOLO("../yolo_weights/yolo11x-seg.pt")

# Define a vertical line at x = 400 pixels from the left
line_x = 400

# Run inference in stream mode
results = detection_model.track(
    source="https://www.youtube.com/watch?v=K6xsEng2PhU", 
    stream_buffer=True,
    stream=True, 
    persist=True, 
    vid_stride=1, 
    classes=[2]
)

# Set to store IDs of cars already captured
captured_ids = set()
frame_id = 0  # Counter for saved frames

# Process each frame from the stream
for result in results:
    # Get the annotated frame with built-in detections
    annotated_frame = result.plot()  # image with bounding boxes & labels
    frame_height, frame_width = annotated_frame.shape[:2]

    # Draw the vertical line for reference
    cv2.line(annotated_frame, (line_x, 0), (line_x, frame_height), (0, 255, 0), 2)

    save_frame = False  # Flag to decide if this frame should be saved

    # Process each detected box (assumed to be in result.boxes.xyxy)
    if result.boxes is not None and len(result.boxes.xyxy) > 0:
        for i, box in enumerate(result.boxes.xyxy):
            # Get bounding box coordinates and compute the center
            if hasattr(box, "cpu"):
                coords = [int(coord) for coord in box.cpu().numpy()]
            else:
                coords = [int(coord) for coord in box]
            x1, y1, x2, y2 = coords
            center_x = int((x1 + x2) / 2)
            center_y = int((y1 + y2) / 2)

            # Annotate the center with a small blue circle
            cv2.circle(annotated_frame, (center_x, center_y), 5, (255, 0, 0), -1)

            # Optionally annotate with tracking ID (if available)
            track_id = None
            if hasattr(result.boxes, "id"):
                track_id = int(result.boxes.id[i])
                cv2.putText(annotated_frame, f"ID:{track_id}", (center_x - 10, center_y - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

            # Check if the car's center has crossed the vertical line
            # and if it hasn't been captured already.
            if center_x >= line_x and track_id is not None and track_id not in captured_ids:
                save_frame = True
                captured_ids.add(track_id)

    # Save the frame if a new car has crossed the line
    if save_frame:
        filename = os.path.join(output_dir, f"frame_{frame_id:05d}.jpg")
        cv2.imwrite(filename, annotated_frame)
        frame_id += 1

    # Display the frame in a window
    cv2.imshow("Vehicle Detection", annotated_frame)
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cv2.destroyAllWindows()



1/1: https://www.youtube.com/watch?v=K6xsEng2PhU... Success  (607 frames of shape 1920x1080 at 30.00 FPS)

0: 384x640 1 car, 27.1ms
0: 384x640 1 car, 25.3ms
0: 384x640 1 car, 23.9ms
0: 384x640 1 car, 23.7ms
0: 384x640 1 car, 23.9ms
0: 384x640 1 car, 23.4ms
0: 384x640 1 car, 23.6ms
0: 384x640 1 car, 24.1ms
0: 384x640 1 car, 24.3ms
0: 384x640 1 car, 24.4ms
0: 384x640 1 car, 21.9ms
0: 384x640 1 car, 22.4ms
0: 384x640 1 car, 21.8ms
0: 384x640 1 car, 22.3ms
0: 384x640 1 car, 21.6ms
0: 384x640 1 car, 23.8ms
0: 384x640 1 car, 21.4ms
0: 384x640 1 car, 22.2ms
0: 384x640 1 car, 22.0ms
0: 384x640 1 car, 22.7ms
0: 384x640 1 car, 22.4ms
0: 384x640 1 car, 21.3ms
0: 384x640 1 car, 22.1ms
0: 384x640 1 car, 21.7ms
0: 384x640 1 car, 21.2ms
0: 384x640 1 car, 22.1ms
0: 384x640 1 car, 22.5ms
0: 384x640 1 car, 21.5ms
0: 384x640 1 car, 21.3ms
0: 384x640 1 car, 21.4ms
0: 384x640 1 car, 21.4ms
0: 384x640 1 car, 21.7ms
0: 384x640 1 car, 21.3ms
0: 384x640 1 car, 21.0ms
0: 384x640 1 car, 21.4ms
0: 384x640 1 car,

TypeError: 'NoneType' object is not subscriptable




AttributeError: 'NoneType' object has no attribute 'isnumeric'

In [None]:
cap = cv2.VideoCapture()
# Define the x-coordinate of the black line (on the orange post)
black_line_x = 1090  # Adjust based on your specific video
# Define the x-coordinate of the blue line (400 pixels to the right)
blue_line_x = black_line_x + 400
# Tolerance (pixelwise)
tolerance = 5

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()
    if not success:
        break  # Break the loop if the end of the video is reached

    # Run YOLO inference on the frame, filtering for the 'boat' class (class index 8)
    results = detection_model.track(frame, classes=[8], persist=True)
    # Visualize the results on the frame
    annotated_frame = results[0].plot()

    # Iterate over detected objects
    for result in results:
        boxes = result.boxes
        for box in boxes:
            # Extract class ID and confidence
            class_id = int(box.cls)
            confidence = box.conf

            # Filter for boats with a confidence threshold
            if class_id == 8 and confidence > 0.2:
                # Extract bounding box coordinates
                xyxy = box.xyxy.cpu().numpy().flatten()
                x1, y1, x2, y2 = map(int, xyxy)

                # Calculate center of mass (centroid)
                center_x = (x1 + x2) // 2
                center_y = (y1 + y2) // 2

                # Draw center of mass
                cv2.circle(annotated_frame, (center_x, center_y), 5, (0, 255, 255), -1)

                # Check if the boat's center crosses the blue line
                if blue_line_x - tolerance <= center_x <= blue_line_x + tolerance:
                    # Use vessel and format ID properly in filename
                    vessel_id = int(box.id.cpu().numpy()) if box.id is not None else 0
                    filename = os.path.join(output_dir, f"vessel_id{vessel_id}_detected.png")
                    cv2.imwrite(filename, frame, [cv2.IMWRITE_PNG_COMPRESSION, 0])  # Save with no compression for highest quality
                    print(f"Frame saved: {filename}")
              
    # Draw the black vertical line on the orange post
    cv2.line(annotated_frame, (black_line_x, 0), (black_line_x, frame.shape[0]), (0, 0, 0), 10)
    # Draw the bright blue vertical line 400 pixels to the right
    cv2.line(annotated_frame, (blue_line_x, 0), (blue_line_x, frame.shape[0]), (255, 0, 0), 10)        
    # Display the annotated frame
    cv2.imshow("YOLO Inference", annotated_frame)
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

# Release the video capture object and close the display window
cap.release()
cv2.destroyAllWindows()



0: 384x640 4 boats, 195.1ms
Speed: 3.0ms preprocess, 195.1ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 200.2ms
Speed: 2.0ms preprocess, 200.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 210.1ms
Speed: 2.0ms preprocess, 210.1ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 211.0ms
Speed: 5.0ms preprocess, 211.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 205.0ms
Speed: 1.1ms preprocess, 205.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 187.5ms
Speed: 2.4ms preprocess, 187.5ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 194.4ms
Speed: 6.5ms preprocess, 194.4ms inference, 8.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 boats, 207.0ms
Speed: 0.0ms preprocess, 207.0ms inference, 0.0ms postprocess per image at shape (