In [16]:
import cv2
import numpy as np
import urllib.request

class ObjectDetector:
    def __init__(self, model, confidence_threshold=0.2):
        self.model = model
        self.confidence_threshold = confidence_threshold

    def _open_input(self, input_path):
        if input_path.startswith('http'):  # Se for um link de streaming
            stream = urllib.request.urlopen(input_path)
            bytes = bytearray()
            while True:
                bytes += stream.read(1024)
                a = bytes.find(b'\xff\xd8')
                b = bytes.find(b'\xff\xd9')
                if a != -1 and b != -1:
                    jpg = bytes[a:b + 2]
                    bytes = bytes[b + 2:]
                    frame = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
                    return cv2.VideoCapture(frame)
        else:  # Se for uma imagem ou vídeo
            return cv2.VideoCapture(input_path)

    def _process_frame(self, frame):
        # Faz previsões no quadro
        predictions = self.model.predict(frame, conf=self.confidence_threshold)

        # Extrai informações das previsões apenas para a classe de pessoa
        frame_predictions = []
        for prediction in predictions:
            class_names = prediction.class_names
            labels = prediction.prediction.labels
            confidence = prediction.prediction.confidence
            bboxes = prediction.prediction.bboxes_xyxy

            for label, conf, bbox in zip(labels, confidence, bboxes):
                # Verifica se o rótulo é para uma pessoa
                if class_names[int(label)] == 'person' and conf >= self.confidence_threshold:
                    # Salva a previsão no formato desejado
                    frame_predictions.append({
                        "class_name": class_names[int(label)],
                        "confidence": conf,
                        "bbox": bbox
                    })

                    # Desenha a caixa delimitadora e o rótulo no quadro
                    xmin, ymin, xmax, ymax = map(int, bbox)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                    cv2.putText(frame, f'Person: {conf:.2f}', (xmin, ymin - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        return frame, frame_predictions

    def detect_objects(self, input_path, output_path, max_frames=None):
        # Abre a entrada com base no tipo (imagem, vídeo ou streaming)
        cap = self._open_input(input_path)

        # Obtém a largura e a altura do quadro do vídeo
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        if max_frames is not None:
            total_frames = min(max_frames, total_frames)

        # Define o codec e cria o objeto VideoWriter
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))

        # Processa cada frame do vídeo
        frame_count = 0
        while cap.isOpened() and frame_count < total_frames:
            ret, frame = cap.read()
            if not ret:
                break

            # Processa o quadro e obtém as previsões
            frame_processed, _ = self._process_frame(frame)

            # Escreve o quadro processado no vídeo de saída
            out.write(frame_processed)

            frame_count += 1

        # Libera os objetos de captura de vídeo e gravação
        cap.release()
        out.release()

        print("Vídeo processado e salvo com sucesso em:", output_path)


In [None]:
from super_gradients.common.object_names import Models
from super_gradients.training import models

# Importe a classe ObjectDetector
# from object_detector import ObjectDetector

# Carregue o modelo YOLO-NAS-L
model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")

# Instancie o ObjectDetector com o modelo carregado
detector = ObjectDetector(model)

# Caminho do vídeo de entrada
input_video_path = "/kaggle/input/video/videoplayback (1).mp4"

# Caminho do vídeo de saída com as previsões
output_video_path = "/kaggle/working/saida_com_previsoes.mp4"

# Número máximo de quadros a serem processados (opcional)
max_frames = 500  # Processará apenas os primeiros 500 quadros

# Chame o método detect_objects para processar o vídeo e salvar o vídeo de saída com previsões
detector.detect_objects(input_video_path, output_video_path, max_frames=max_frames)


[2024-05-11 14:15:03] INFO - checkpoint_utils.py - License Notification: YOLO-NAS pre-trained weights are subjected to the specific license terms and conditions detailed in 
https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md
By downloading the pre-trained weight files you agree to comply with these terms.
[2024-05-11 14:15:03] INFO - checkpoint_utils.py - Successfully loaded pretrained weights for architecture yolo_nas_l
[2024-05-11 14:15:03] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:15:04] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:15:05] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:15:06] INFO - pipelines.py - Fusing some of the mod