In [1]:
%pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.78-py3-none-any.whl.metadata (35 kB)
Collecting numpy<=2.1.1,>=1.23.0 (from ultralytics)
  Downloading numpy-2.1.1-cp313-cp313-win_amd64.whl.metadata (59 kB)
Collecting matplotlib>=3.3.0 (from ultralytics)
  Downloading matplotlib-3.10.0-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting opencv-python>=4.6.0 (from ultralytics)
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Collecting pillow>=7.1.2 (from ultralytics)
  Downloading pillow-11.1.0-cp313-cp313-win_amd64.whl.metadata (9.3 kB)
Collecting pyyaml>=5.3.1 (from ultralytics)
  Downloading PyYAML-6.0.2-cp313-cp313-win_amd64.whl.metadata (2.1 kB)
Collecting scipy>=1.4.1 (from ultralytics)
  Downloading scipy-1.15.2-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting torch>=1.8.0 (from ultralytics)
  Downloading torch-2.6.0-cp313-cp313-win_amd64.whl.metadata (28 kB)
Collecting torchvision>=0.9.0 (from ultralytics)
  Downloading torchvision-0.21.


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from ultralytics import YOLO
import cv2
from collections import defaultdict
import numpy as np
import time

In [3]:
# Load the model YOLOv8
model = YOLO('best_V2.pt')

### Utilização em Webcam

In [None]:
# Definir a fonte de vídeo como a webcam
VIDEO_SOURCE = cv2.VideoCapture(0)  # 0 para a webcam padrão

# Obter FPS da webcam (pode variar)
fps = VIDEO_SOURCE.get(cv2.CAP_PROP_FPS)
frame_time = 1 / fps if fps > 0 else 1 / 30  # Prevenir divisão por zero

time_tracker = defaultdict(lambda: 0.0)
total_detected_time = 0.0  # Tempo total de detecção

while True:
    ret, frame = VIDEO_SOURCE.read()
    if not ret:
        break

    # Fazer a previsão com YOLO
    results = model(frame)

    # Processar os resultados
    for result in results:
        bboxes = np.array(result.boxes.xyxy.cpu(), dtype="int")
        classes = np.array(result.boxes.cls.cpu(), dtype="int")
        confidence = np.array(result.boxes.conf.cpu(), dtype="float")

        detected_classes = set()

        for cls, bbox, conf in zip(classes, bboxes, confidence):
            if conf < 0.5:  # Filtrar detecções com baixa confiança
                continue

            object_name = model.names[cls]
            detected_classes.add(object_name)
            time_tracker[object_name] += frame_time

            # Desenhar caixa e rótulo
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (37, 245, 75), 2)
            cv2.putText(frame, f"{object_name}: {conf:.2f}", (bbox[0], bbox[1] - 5),
                        cv2.FONT_HERSHEY_PLAIN, 2, (37, 245, 75), 2)

    # Mostrar o frame processado
    cv2.imshow("Webcam Detection", frame)

    # Pressione 'q' para sair
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Liberar os recursos
VIDEO_SOURCE.release()
cv2.destroyAllWindows()

# Exibir resultados finais
def format_time(seconds):
    minutes = seconds / 60
    return f"{minutes:.2f} minutos"

print("\nTempo total por classe detectada:")
for obj, t in time_tracker.items():
    print(f"Tempo {obj}: {format_time(t)}")

### Utilização em Vídeo

Substitua o caminho em VIDEO_SOURCE pelo caminho do vídeo requerido

In [5]:

# Definir a fonte de vídeo
VIDEO_SOURCE = cv2.VideoCapture('Teste.mp4')

# Obter FPS e duração do vídeo
fps = VIDEO_SOURCE.get(cv2.CAP_PROP_FPS)
total_frames = int(VIDEO_SOURCE.get(cv2.CAP_PROP_FRAME_COUNT))
video_duration = total_frames / fps  # Duração total em segundos
frame_time = 1 / fps  # Tempo por frame

time_tracker = defaultdict(lambda: 0.0)
total_detected_time = 0.0  # Tempo total de detecção

batch_size = 5
frames_batch = []

while True:
    ret, frame = VIDEO_SOURCE.read()
    if not ret:
        break

    frames_batch.append(frame)

    if len(frames_batch) >= batch_size:
        results = model(frames_batch)

        for i, result in enumerate(results):
            frame = frames_batch[i].copy()
            bboxes = np.array(result.boxes.xyxy.cpu(), dtype="int")
            classes = np.array(result.boxes.cls.cpu(), dtype="int")
            confidence = np.array(result.boxes.conf.cpu(), dtype="float")

            detected_classes = set()

            for cls, bbox, conf in zip(classes, bboxes, confidence):
                if conf < 0.5:
                    continue

                object_name = model.names[cls]
                detected_classes.add(object_name)
                time_tracker[object_name] += frame_time

                cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (37, 245, 75), 2)
                cv2.putText(frame, f"{object_name}: {conf:.2f}", (bbox[0], bbox[1] - 5),
                            cv2.FONT_HERSHEY_PLAIN, 2, (37, 245, 75), 2)
    
            cv2.imshow("Video Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        frames_batch = []

VIDEO_SOURCE.release()
cv2.destroyAllWindows()

def format_time(seconds, total):
    minutes = seconds / 60
    percent = (seconds / total) * 100
    return f"{minutes:.2f} minutes ({percent:.0f}%)"

print("\nTotal time per detected class:")
for obj, t in time_tracker.items():
    print(f"Time {obj}: {format_time(t, video_duration)}")

time_no_detection = video_duration - sum(time_tracker.values())
print(f"Time with no user detected: {format_time(time_no_detection, video_duration)}")
print(f"Total duration: {format_time(video_duration, video_duration)}")


0: 288x640 1 Seated, 59.5ms
1: 288x640 1 Seated, 59.5ms
2: 288x640 1 Seated, 59.5ms
3: 288x640 (no detections), 59.5ms
4: 288x640 (no detections), 59.5ms
Speed: 2.9ms preprocess, 59.5ms inference, 0.5ms postprocess per image at shape (1, 3, 288, 640)



  bboxes = np.array(result.boxes.xyxy.cpu(), dtype="int")
  classes = np.array(result.boxes.cls.cpu(), dtype="int")
  confidence = np.array(result.boxes.conf.cpu(), dtype="float")


0: 288x640 (no detections), 48.7ms
1: 288x640 (no detections), 48.7ms
2: 288x640 (no detections), 48.7ms
3: 288x640 (no detections), 48.7ms
4: 288x640 (no detections), 48.7ms
Speed: 2.5ms preprocess, 48.7ms inference, 0.3ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 (no detections), 44.4ms
1: 288x640 (no detections), 44.4ms
2: 288x640 (no detections), 44.4ms
3: 288x640 1 lying_down, 44.4ms
4: 288x640 1 lying_down, 44.4ms
Speed: 2.3ms preprocess, 44.4ms inference, 0.7ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 (no detections), 48.6ms
1: 288x640 (no detections), 48.6ms
2: 288x640 1 lying_down, 48.6ms
3: 288x640 1 lying_down, 48.6ms
4: 288x640 1 lying_down, 48.6ms
Speed: 1.9ms preprocess, 48.6ms inference, 0.7ms postprocess per image at shape (1, 3, 288, 640)

0: 288x640 1 lying_down, 45.9ms
1: 288x640 1 lying_down, 45.9ms
2: 288x640 1 lying_down, 45.9ms
3: 288x640 1 lying_down, 45.9ms
4: 288x640 1 lying_down, 45.9ms
Speed: 2.4ms preprocess, 45.9ms infe