In [1]:
# traffic_light_detection.py
import cv2
from ultralytics import YOLO
import os
from screeninfo import get_monitors

In [2]:
# Get screen resolution
screen = get_monitors()[0]  # Get the primary monitor
screen_width, screen_height = screen.width, screen.height

In [3]:
# Load YOLO model for traffic light detection
model = YOLO('yolov8n.pt') 

In [4]:
# Load the video
video_path = r"..\Resources\Videos\Real_life_test_daylight_4.mp4"
#video_path = r"E:\Education\Projects\Machine Learning\Computer Vision\MSD Models\Resources\Videos\5514514-uhd_3840_2160_24fps.mp4"
#video_path = r"E:\Education\Projects\Machine Learning\Computer Vision\MSD Models\Resources\Videos\6272904-uhd_2160_3840_24fps.mp4"
#video_path = r"E:\Education\Projects\Machine Learning\Computer Vision\MSD Models\Resources\Videos\21118-315137091_medium.mp4"
output_dir = r"..\..\traffic_light_data\test"
# Ensure output directory exists
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [5]:
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Cannot open video file!")
    exit()

In [6]:
fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = 0
saved_count = 0

cv2.namedWindow("Video", cv2.WINDOW_NORMAL)  # Create a resizable window

while True:
    ret, frame = cap.read()
    if not ret:
        print("End of video or cannot access the video.")
        break

    frame_count += 1
    # Process every frame or skip to reduce processing
    if frame_count % int(fps) != 0:  # Process one frame per second
        continue

    # YOLO detection
    results = model(frame)  # Run the YOLO model on the current frame
    
    # Access the first result in the list
    result = results[0]
    
    # Iterate through detected boxes
    for box in result.boxes:
        class_id = int(box.cls)  # Object class ID
        if class_id == 9:  # class 9 corresponds to traffic lights in YOLO
            # Extract bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0])  # Bounding box coordinates
            traffic_light_roi = frame[y1:y2, x1:x2]  # Crop the traffic light region
    
            # Save the frame with a timestamp or unique name
            save_path = f"{output_dir}/traffic_light_frame{frame_count}.png"
            cv2.imwrite(save_path, traffic_light_roi)
            print(f"Saved ROI: {save_path}")
            saved_count += 1
            break  # Avoid multiple saves for the same frame
            
    # Resize frame to fit screen size
    frame_height, frame_width = frame.shape[:2]
    aspect_ratio = frame_width / frame_height

    # Calculate new dimensions while maintaining aspect ratio
    if frame_width > screen_width or frame_height > screen_height:
        if frame_width / screen_width > frame_height / screen_height:
            new_width = screen_width
            new_height = int(screen_width / aspect_ratio)
        else:
            new_height = screen_height
            new_width = int(screen_height * aspect_ratio)
    else:
        new_width, new_height = frame_width, frame_height

    resized_frame = cv2.resize(frame, (new_width, new_height))
    cv2.imshow("Video", resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
        break

print(f"Total frames saved: {saved_count}")
cap.release()
cv2.destroyAllWindows()


0: 384x640 11 cars, 2 trucks, 123.1ms
Speed: 8.8ms preprocess, 123.1ms inference, 13.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 cars, 1 truck, 45.8ms
Speed: 3.0ms preprocess, 45.8ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 cars, 2 trucks, 44.8ms
Speed: 2.0ms preprocess, 44.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 cars, 2 trucks, 49.4ms
Speed: 2.0ms preprocess, 49.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 9 cars, 2 trucks, 47.9ms
Speed: 2.6ms preprocess, 47.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 9 cars, 2 trucks, 43.4ms
Speed: 2.6ms preprocess, 43.4ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 10 cars, 1 truck, 46.3ms
Speed: 1.0ms preprocess, 46.3ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 8 cars, 2 truc