# Object Detection - Input Video

In [3]:
# pip install ultralytics

In [5]:
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import os
import time


In [6]:
# 游댳 Load pre-trained YOLOv8 model
model = YOLO("yolov8n.pt")

In [7]:
!ls '/media/akashs/FA22E72622E6E69B/DATA1_disk/ACADEMICS/3_IPCS_Works/data/yolo/'

classroom.jpg  ship.jpg       traffic2.jpg   traffic.jpg
face.jpg       traffic_1.mp4  traffic_2.mp4


In [9]:


# Access default webcam (0)
cap = cv2.VideoCapture('/media/akashs/FA22E72622E6E69B/DATA1_disk/ACADEMICS/3_IPCS_Works/data/yolo/traffic_1.mp4')

fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video_filename = f"recorded_{time.strftime('%Y%m%d_%H%M%S')}.avi"
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_filename, fourcc, 20.0, (frame_width, frame_height))



img_counter = 0
print("Webcam started. Press 's' to save image, 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Webcam frame not received")
        break

    # -----------------------------
    # Draw Line, Rectangle, Circle, Text
    # -----------------------------
    height, width = frame.shape[:2]

    results = model(frame)

    # 游댳 Extract results
    boxes = results[0].boxes
    names = model.names  # COCO class names

    # Uses OpenCV's haarcascade
    # https://github.com/opencv/opencv/tree/master/data/haarcascades
    

    # 游댳 Draw results on the image
    for box in boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])  # bounding box
        conf = box.conf[0].item()
        cls = int(box.cls[0].item())
        label = f"{names[cls]}: {conf:.2f}"
    
        # Draw rectangle and label
        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (255, 0, 0), 2)
    
        print(f"游릭 Detected: {label} at ({x1}, {y1}, {x2}, {y2})")


    # -----------------------------
    # Save frame to video file
    # -----------------------------
    out.write(frame)

    # -----------------------------
    # Show webcam feed
    # -----------------------------
    cv2.imshow('Webcam Feed with Drawing', frame)

    # -----------------------------
    # Keypress handling
    # -----------------------------
    key = cv2.waitKey(1)
    if key & 0xFF == ord('q'):
        print("Exiting webcam...")
        break
    elif key & 0xFF == ord('s'):
        # Save snapshot image
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        img_filename = f"snapshot_{timestamp}.jpg"
        cv2.imwrite(img_filename, frame)
        print(f"Snapshot saved: {img_filename}")
        img_counter += 1

# -----------------------------
# Release resources
# -----------------------------
cap.release()
out.release()
cv2.destroyAllWindows()
print(f"Recorded video saved as: {output_video_filename}")


Webcam started. Press 's' to save image, 'q' to quit.

0: 384x640 4 persons, 10 cars, 2 buss, 4 trucks, 1 traffic light, 192.2ms
Speed: 15.4ms preprocess, 192.2ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)
游릭 Detected: bus: 0.95 at (1546, 858, 1936, 1449)
游릭 Detected: car: 0.86 at (404, 1496, 948, 1926)
游릭 Detected: car: 0.86 at (70, 1591, 636, 2137)
游릭 Detected: person: 0.85 at (2208, 1529, 2413, 2034)
游릭 Detected: car: 0.83 at (3, 1681, 437, 2145)
游릭 Detected: person: 0.63 at (2115, 1203, 2216, 1550)
游릭 Detected: car: 0.60 at (643, 1403, 1044, 1701)
游릭 Detected: truck: 0.54 at (854, 1242, 1291, 1584)
游릭 Detected: person: 0.54 at (2237, 1392, 2310, 1565)
游릭 Detected: bus: 0.49 at (1976, 928, 2385, 1491)
游릭 Detected: car: 0.45 at (0, 1440, 153, 1639)
游릭 Detected: car: 0.38 at (411, 1470, 966, 1737)
游릭 Detected: car: 0.36 at (1217, 1227, 1445, 1430)
游릭 Detected: car: 0.30 at (445, 1250, 791, 1447)
游릭 Detected: traffic light: 0.30 at (500, 986, 545, 1139)
游릭 Detecte