In [None]:
import cv2
import pandas as pd
from typing import List

# Define YOLO class labels (using COCO dataset classes as an example)
YOLO_LABELS = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
    "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
    "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
    "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
    "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant",
    "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote",
    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]

def create_annotated_video(
    df: pd.DataFrame, 
    labels_to_show: List[int],
    input_video_path: str,
    output_video_path: str,
    confidence_threshold: float = 0.3
):
    """
    Creates an annotated video showing bounding boxes and labels for the specified detections.
    For detections with the "person" label, in the last 10 frames only the most centered detection 
    (as determined by the 'track_id') is annotated as "Reptilian 1.0" with a dark green bounding box.
    
    Args:
        df (pd.DataFrame): DataFrame with columns:
            - image_idx: index of the frame
            - label: class label (as integer corresponding to an index in YOLO_LABELS)
            - confidence: detection confidence
            - x, y: bounding box center (in pixel coordinates)
            - w, h: bounding box width and height
            - track_id: unique identifier for tracking a given object across frames.
        labels_to_show (List[int]): List of label indices to display in the video.
        input_video_path (str): Path to the input video file.
        output_video_path (str): Path to the output (annotated) video file.
        confidence_threshold (float): Only draw bounding boxes with confidence above this value.
    """
    
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        print(f"Could not open video file: {input_video_path}")
        return

    # Video properties
    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps    = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Video writer setup
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    
    # Pre-filter DataFrame: use only detections above the confidence threshold and within specified labels.
    df_filtered = df[(df['confidence'] > confidence_threshold) & (df['label'].isin(labels_to_show))]
    grouped = df_filtered.groupby("image_idx")
    
    frame_idx = 0  
    last_frame = None  # to store the final annotated frame
    
    # Variable to hold the chosen track_id for "Reptilian"
    reptilian_track_id = None
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Optional extra break if frame_idx exceeds total_frames
        if total_frames > 0 and frame_idx >= total_frames:
            break

        # Get detections for the current frame (if any)
        if frame_idx in grouped.groups:
            frame_df = grouped.get_group(frame_idx)
        else:
            frame_df = pd.DataFrame(columns=df.columns)
        
        # In the last 10 frames, if we haven't chosen a reptilian track yet, pick the most centered "person"
        if total_frames > 0 and frame_idx >= total_frames - 10:
            if reptilian_track_id is None:
                person_detections = frame_df[frame_df['label'] == 0]
                if not person_detections.empty:
                    frame_center_x = frame.shape[1] // 2
                    frame_center_y = frame.shape[0] // 2
                    min_dist = float('inf')
                    for _, row in person_detections.iterrows():
                        dist = ((row['x'] - frame_center_x) ** 2 + (row['y'] - frame_center_y) ** 2) ** 0.5
                        if dist < min_dist:
                            min_dist = dist
                            reptilian_track_id = row['track_id']
        
        # Process each detection in this frame
        for idx, row in frame_df.iterrows():
            label_idx = int(row['label'])
            conf = row['confidence']
            # Retrieve label name from YOLO_LABELS (if index valid)
            label_name = YOLO_LABELS[label_idx] if label_idx < len(YOLO_LABELS) else str(label_idx)
            
            # Default: use a hash-based color for each label
            color = ((label_idx * 37) % 256, (label_idx * 67) % 256, (label_idx * 97) % 256)
            text = f"{label_name} {conf:.2f}"
            
            # For the last 10 frames, if the detection is a "person" and its track_id matches reptilian_track_id, override the text and color.
            if total_frames > 0 and frame_idx >= total_frames - 10 and label_idx == 0:
                if reptilian_track_id is not None and row['track_id'] == reptilian_track_id:
                    text = "Reptilian 1.0"
                    color = (0, 128, 0)  # Dark green
            
            x_center = row['x']
            y_center = row['y']
            w_box = row['w']
            h_box = row['h']
            
            # Calculate bounding box corners
            x1 = int(x_center - w_box / 2)
            y1 = int(y_center - h_box / 2)
            x2 = int(x_center + w_box / 2)
            y2 = int(y_center + h_box / 2)
            
            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            
            # Prepare text with background rectangle for improved visibility
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1
            thickness = 1
            (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
            y_label = max(0, y1 - 5)
            rect_x1 = max(x1, 0)
            rect_y1 = max(y_label - text_height - baseline, 0)
            rect_x2 = min(x1 + text_width, frame.shape[1])
            rect_y2 = min(y_label, frame.shape[0])
            
            cv2.rectangle(frame, (rect_x1, rect_y1), (rect_x2, rect_y2), color, cv2.FILLED)
            cv2.putText(frame, text, (rect_x1, rect_y2 - baseline), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
            
            
        out.write(frame)
        last_frame = frame.copy()
        frame_idx += 1
        
        # Optional: Allow manual exit (e.g., when processing live streams)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    print(f"Annotated video saved as: {output_video_path}")
    
    # Display the last annotated frame
    if last_frame is not None:
        cv2.imshow("Last Annotated Frame", last_frame)
        cv2.waitKey(0)
        cv2.destroyAllWindows()


if __name__ == "__main__":
    # Example usage: create a dummy DataFrame for demonstration.
    # In practice, replace this with your actual detections.
    columns = ["image_idx", "label", "confidence", "x", "y", "w", "h"]
    # Create dummy data for a 100-frame video; every frame has one detection.
    # For demonstration, label 0 ("person") is used for every frame.

    df = pd.read_csv("/home/wildmosquit0/workspace/data/images/Joni_reptilians/track/results.csv")
    
    # Specify which labels to show (here, only label 0 corresponding to "person")
    labels_to_show = list(range(len(YOLO_LABELS)))
    
    # Replace these paths with your actual video paths.
    input_video_path = "/home/wildmosquit0/workspace/data/images/Joni_reptilians/baylor.mp4"
    output_video_path = "/home/wildmosquit0/workspace/data/images/Joni_reptilians/rept.mp4"
    
    create_annotated_video(df, labels_to_show, input_video_path, output_video_path, confidence_threshold=0.3)

Annotated video saved as: /home/wildmosquit0/workspace/data/images/Joni_reptilians/rept.mp4


In [None]:
import pandas as pd

# First load the CSV file into a DataFrame


# Then call your function, passing the actual DataFrame (df)
create_annotated_video(
    df,                       # <-- pass the DataFrame, not the CSV path
    labels_to_show=YOLO_LABELS,                        # class_label
    input_video_path = "/home/wildmosquit0/workspace/data/images/Joni_reptilians/baylor.mp4", 
    output_video_path = "/home/wildmosquit0/workspace/data/images/Joni_reptilians/rept.mp4"
)


Annotated video saved as: /home/wildmosquit0/workspace/data/images/Joni_reptilians/rept.mp4


In [1]:
YOLO_LABELS = [
    "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
    "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
    "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
    "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
    "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
    "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant",
    "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote",
    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
    "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]

In [2]:
YOLO_LABELS.unique()

AttributeError: 'list' object has no attribute 'unique'