In [None]:
import cv2
import numpy as np
import urllib
from super_gradients.common.object_names import Models
from super_gradients.training import models

class ObjectDetector:
    def __init__(self, model):
        self.model = model

    def process_frame(self, frame, confidence_threshold):
        predictions = self.model.predict(frame, conf=confidence_threshold)
        frame_predictions = []
        for prediction in predictions:
            class_names = prediction.class_names
            labels = prediction.prediction.labels
            confidence = prediction.prediction.confidence
            bboxes = prediction.prediction.bboxes_xyxy

            for label, conf, bbox in zip(labels, confidence, bboxes):
                if class_names[int(label)] == 'person' and conf >= confidence_threshold:
                    frame_predictions.append({
                        "class_name": class_names[int(label)],
                        "confidence": conf,
                        "bbox": bbox
                    })

                    xmin, ymin, xmax, ymax = map(int, bbox)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                    cv2.putText(frame, f'Person: {conf:.2f}', (xmin, ymin - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        return frame, frame_predictions

    def open_input(self, input_path):
        if input_path.startswith('http'): 
            stream = urllib.request.urlopen(input_path)
            bytes = bytearray()
            while True:
                bytes += stream.read(1024)
                a = bytes.find(b'\xff\xd8')
                b = bytes.find(b'\xff\xd9')
                if a != -1 and b != -1:
                    jpg = bytes[a:b + 2]
                    bytes = bytes[b + 2:]
                    frame = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
                    return cv2.VideoCapture(frame)
        elif input_path.endswith(('jpg', 'jpeg', 'png', 'bmp')): 
            return cv2.VideoCapture(input_path)
        else:  
            return cv2.VideoCapture(input_path)

    def process_input(self, input_path):
        cap = self.open_input(input_path)
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        return cap, frame_width, frame_height

    def detect_objects(self, input_path, output_path, max_frames=None, confidence_threshold=0.2):
        cap, frame_width, frame_height = self.process_input(input_path)

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))

        frame_count = 0
        while cap.isOpened() and (max_frames is None or frame_count < max_frames):
            ret, frame = cap.read()
            if not ret:
                break

            frame_processed, _ = self.process_frame(frame, confidence_threshold)

            out.write(frame_processed)
            frame_count += 1

        cap.release()
        out.release()
        print("Vídeo com previsões salvo com sucesso em:", output_path)


# Carregue o modelo YOLO-NAS-L
model = models.get(Models.YOLO_NAS_L, pretrained_weights="coco")

# Instancie o ObjectDetector com o modelo carregado
detector = ObjectDetector(model)

# Caminho do vídeo de entrada
input_video_path = "/kaggle/input/video/videoplayback (1).mp4"

# Caminho do vídeo de saída com as previsões
output_video_path = "/kaggle/working/saida_com_previsoes.mp4"

# Número máximo de quadros a serem processados (opcional)
max_frames = 500  # Processará apenas os primeiros 500 quadros

# Limiar de confiança para detecção de objetos (opcional)
confidence_threshold = 0.2

# Chame o método detect_objects para processar o vídeo e salvar o vídeo de saída com previsões
detector.detect_objects(input_video_path, output_video_path, max_frames=max_frames, confidence_threshold=confidence_threshold)


[2024-05-11 14:24:13] INFO - checkpoint_utils.py - License Notification: YOLO-NAS pre-trained weights are subjected to the specific license terms and conditions detailed in 
https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.YOLONAS.md
By downloading the pre-trained weight files you agree to comply with these terms.
[2024-05-11 14:24:14] INFO - checkpoint_utils.py - Successfully loaded pretrained weights for architecture yolo_nas_l
[2024-05-11 14:24:14] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:24:15] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:24:16] INFO - pipelines.py - Fusing some of the model's layers. If this takes too much memory, you can deactivate it by setting `fuse_model=False`
[2024-05-11 14:24:17] INFO - pipelines.py - Fusing some of the mod