<a href="https://colab.research.google.com/github/bhattacharyyad/Scopus_Data/blob/master/Detect_Track_22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics
import cv2
from ultralytics import YOLO
from PIL import Image
import numpy as np
from google.colab.patches import cv2_imshow # Import cv2_imshow for Colab display
import time

# Load the YOLO model
model = YOLO('yolov8n.pt')

# Label dictionary
label_dict = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign',
         12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}

# --- Removed tkinter GUI setup ---
# Interactive elements like label selection and confidence slider are removed.
# Default values for these will be used for demonstration.
selected_labels = ['person', 'car', 'bicycle'] # Example: default selected labels
conf_threshold = 0.5 # Example: default confidence threshold

# List to store polygon points (will not be interactive without GUI)
# For demonstration, we can define a static polygon if needed, or remove this feature.
# For now, it will be kept but not interactively modifiable.
polygon_points = []

def process_video():
    # Define maximum width and height
    max_width = 600
    max_height = 400

    # Open video capture
    # Make sure 'room-video.mp4' is available in your Colab environment
    cap = cv2.VideoCapture("room-video2.mp4")

    if not cap.isOpened():
        print("Error: Could not open video file.")
        return

    # Get video properties for saving (optional, but good practice)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Codec for .mp4
    fps = cap.get(cv2.CAP_PROP_FPS)
    out_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    out_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    out = cv2.VideoWriter('output.mp4', fourcc, fps, (out_width, out_height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame if it exceeds maximum width or height
        height, width = frame.shape[:2]
        if width > max_width or height > max_height:
            # Calculate scaling factor
            scale = min(max_width / width, max_height / height)
            new_width = int(width * scale)
            new_height = int(height * scale)
            frame = cv2.resize(frame, (new_width, new_height))

        # Run YOLO detection
        results = model(frame)

        # Convert polygon points to OpenCV coordinates (if a static polygon is defined)
        polygon_pts = None
        if polygon_points: # If you manually define polygon_points for testing
            polygon_pts = np.array([(x, y) for x, y in polygon_points], np.int32)
            polygon_pts = polygon_pts.reshape((-1, 1, 2))

        # Filter results based on selected labels and confidence score
        for result in results:
            for id, box in enumerate(result.boxes.xyxy):
                class_id = int(result.boxes.cls[id])
                label = label_dict[class_id]
                conf = result.boxes.conf[id]

                if label in selected_labels and conf >= conf_threshold:
                    x1, y1, x2, y2 = map(int, box)
                    center = ((x1 + x2) // 2, (y1 + y2) // 2)

                    # Check if center is inside the polygon (if defined)
                    if polygon_pts is not None:
                        distance = cv2.pointPolygonTest(polygon_pts, center, False)
                        if distance >= 0:
                            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
                            cv2.putText(frame, label, (x2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
                    else:
                         cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
                         cv2.putText(frame, label, (x2, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)

        # Draw the polygon on the frame (if defined)
        if polygon_pts is not None and len(polygon_points) > 1:
            cv2.polylines(frame, [polygon_pts], isClosed=True, color=(0, 255, 0), thickness=2)

        # Display the frame in Colab
        cv2_imshow(frame)
        out.write(frame) # Write the frame to the output video

        # Introduce a small delay to control playback speed and allow display updates
        # cv2.waitKey(1) or time.sleep() cannot be used directly to control cv2_imshow effectively in a loop.
        # The display will update with each call to cv2_imshow.

        # To stop the loop, you might need to manually interrupt the cell execution.

    cap.release()
    out.release()
    # cv2.destroyAllWindows() # Not necessary for Colab's cv2_imshow
    print("Video processing finished. Output saved to output.mp4")

# --- Removed tkinter event bindings and mainloop ---
# The video processing will run once when called.
process_video()


In [None]:
!git add .

In [None]:
!ls /content/

In [None]:
# YOLO from ultralytics (object detection & tracking)
!pip install ultralytics        # https://pypi.org/project/ultralytics/

# OpenCV for video/image processing
!pip install opencv-python      # https://pypi.org/project/opencv-python/

# PyTorch CPU version (works without GPU)
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# If you have a GPU and want GPU acceleration, uncomment the next line:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121


In [None]:
from ultralytics import YOLO
import cv2
import time
import torch
import subprocess
import shutil
import os

print(torch.__version__) # my version --> 2.2.0+cu121
print(torch.cuda.is_available()) # True (GPU is available)


In [None]:
model = YOLO('yolov8n.pt')

In [None]:
def main(tracker_choice='bytetrack'):
    input_path = 'flight1.mp4'  # Your video file path

    # Validate tracker choice
    if tracker_choice not in ['botsort', 'bytetrack']:
        print(f"Invalid tracker choice '{tracker_choice}', defaulting to 'bytetrack'")
        tracker_choice = 'bytetrack'

    # Load the pretrained YOLOv8 model
    model = YOLO('yolov8n.pt')

    # Check if CUDA is available and set device accordingly
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    print(f"Using device: {device}")

    # Load the video file
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error opening video file {input_path}")
        return

    # Initialize variables for FPS calculation
    prev_time = 0

    """
    Parameters of track method:
    source: str - path to video file or camera index
    tracker: str - tracker configuration file (e.g., 'bytetrack.yaml' or 'botsort.yaml')
    conf: float - confidence threshold for detections
    stream: bool - if True, yields frames one by one for real-time processing,
                if False, processes the entire video at once
    """
    results = model.track(
        source=input_path, # path to video file
        tracker=f'{tracker_choice}.yaml',  # 'bytetrack.yaml' or 'botsort.yaml'
        conf=0.3,   # confidence threshold
        stream=True # set it to True for continuous video processing
    )

    # loop through the results
    for frame_result in results:
        # Get the original frame
        img = frame_result.orig_img.copy()

        # Calculate FPS (fall back to input_fps until prev_time set)
        curr_time = time.time()
        fps = 1 / (curr_time - prev_time) if prev_time != 0 else cap.get(cv2.CAP_PROP_FPS)
        prev_time = curr_time

        # loop through the detected boxes and draw them on the frame
        for box in frame_result.boxes:
            # Extract bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy.cpu().numpy()[0])
            # Extract confidence
            conf = box.conf.cpu().item()
            # Extract class
            cls = int(box.cls.cpu().item())
            # Extract track ID
            track_id = int(box.id.cpu().item()) if box.id is not None else -1

            """
            model.names is a dictionary mapping class indices to class names.
            {0: 'person',
            1: 'bicycle',
            2: 'car',
            3: 'motorcycle',
            ...,
            }
            """
            class_name = model.names[cls]

            # Draw bounding box and label
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            label = f"{class_name} ID:{track_id} {conf:.2f}"
            cv2.putText(img, label, (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Display tracker name and FPS on top-left corner
        cv2.putText(img, f"Tracker: {tracker_choice}", (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)
        cv2.putText(img, f"FPS: {fps:.2f}", (10, 150),
                    cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 3)

        # Display the frame with detections
        from google.colab.patches import cv2_imshow # Import cv2_imshow for Colab display
        cv2_imshow(img)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    # cv2.destroyAllWindows() # Not necessary for Colab's cv2_imshow

if __name__ == '__main__':
    """ There are two trackers available:
        1. ByteTrack (bytetrack.yaml)
        2. Sort (botsort.yaml)
    """
    main(tracker_choice='bytetrack')