In [None]:
import cv2
from ultralytics import YOLO
import numpy as np
from pathlib import Path

def process_video_with_dual_models(
    video_path,
    model1_path,
    model2_path,
    output_path="output_combined_all_again.mp4",
    conf_threshold=0.25,
    model1_classes=None
):
    """
    Process video with two YOLO models and draw bounding boxes from both.
    
    Args:
        video_path: Path to input .MOV video
        model1_path: Path to first YOLO model (.pt file)
        model2_path: Path to second YOLO model (.pt file)
        output_path: Path for output video
        conf_threshold: Confidence threshold for detections
        model1_classes: List of class IDs to detect for model1 (None = all classes)
    """
    
    # Load both models
    print("Loading models...")
    model1 = YOLO(model1_path)
    model2 = YOLO(model2_path)
    
    # Get class names for both models
    model1_classes_dict = model1.names
    model2_classes_dict = model2.names
    
    print(f"Model 1 classes: {model1_classes_dict}")
    print(f"Model 2 classes: {model2_classes_dict}")
    
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")
    
    # Get video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"Video properties: {width}x{height} @ {fps}fps, {total_frames} frames")
    
    # Define codec and create VideoWriter
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    
    # Different colors for different models (BGR format)
    color_model1 = (0, 255, 0)    # Green for model 1
    color_model2 = (255, 0, 0)    # Blue for model 2
    
    frame_count = 0
    
    print("Processing video...")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        if frame_count % 30 == 0:  # Progress update every 30 frames
            print(f"Processing frame {frame_count}/{total_frames}")
        
        # Run inference with both models
        results1 = model1(frame, conf=conf_threshold, verbose=False)
        results2 = model2(frame, conf=conf_threshold, verbose=False)
        
        # Draw detections from model 1
        for result in results1:
            boxes = result.boxes
            for box in boxes:
                # Get box coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                
                # Get confidence and class
                conf = float(box.conf[0])
                cls = int(box.cls[0])
                
                # Filter by classes for model1
                if model1_classes is not None and cls not in model1_classes:
                    continue
                
                class_name = model1_classes_dict[cls]
                
                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color_model1, 2)
                
                # Draw label with background
                label = f"{class_name}: {conf:.2f}"
                (label_width, label_height), _ = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
                )
                cv2.rectangle(
                    frame,
                    (x1, y1 - label_height - 10),
                    (x1 + label_width, y1),
                    color_model1,
                    -1
                )
                cv2.putText(
                    frame,
                    label,
                    (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (0, 0, 0),
                    1
                )
        
        # Draw detections from model 2
        for result in results2:
            boxes = result.boxes
            for box in boxes:
                # Get box coordinates
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                
                # Get confidence and class
                conf = float(box.conf[0])
                cls = int(box.cls[0])
                class_name = model2_classes_dict[cls]
                
                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), color_model2, 2)
                
                # Draw label with background
                label = f"{class_name}: {conf:.2f}"
                (label_width, label_height), _ = cv2.getTextSize(
                    label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
                )
                cv2.rectangle(
                    frame,
                    (x1, y1 - label_height - 10),
                    (x1 + label_width, y1),
                    color_model2,
                    -1
                )
                cv2.putText(
                    frame,
                    label,
                    (x1, y1 - 5),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (255, 255, 255),
                    1
                )
        
        # Write the frame
        out.write(frame)
    
    # Release everything
    cap.release()
    out.release()
    
    print(f"\nProcessing complete!")
    print(f"Output saved to: {output_path}")
    print(f"Total frames processed: {frame_count}")



if __name__ == "__main__":
    # Configuration
    VIDEO_PATH = "IMG_6589.MOV"  # Change this to your video path
    MODEL1_PATH = "yolov8n.pt"  # Change this to your first model path
    MODEL2_PATH = "traffic_lights_and_signs_real_car.pt"
    OUTPUT_PATH = "video_processed.mp4"
    CONFIDENCE_THRESHOLD = 0.25
    
    # Classes to detect for model 1 (only these classes will be processed)
    MODEL1_CLASSES = [0, 1, 2, 3, 5, 7, 9, 11]
    
    # Process video
    process_video_with_dual_models(
        video_path=VIDEO_PATH,
        model1_path=MODEL1_PATH,
        model2_path=MODEL2_PATH,
        output_path=OUTPUT_PATH,
        conf_threshold=CONFIDENCE_THRESHOLD,
        model1_classes=MODEL1_CLASSES
    )

Loading models...
Model 1 classes: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keybo