In [1]:
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
import os
import torch

In [2]:
model = YOLO('yolo11s.pt')

In [3]:
# Carpeta de tus frames
def generate_bbox(directory:str):
    fall_dc = directory + "/Fall"
    adl_dc = directory + "/Non_Fall"
    
    fall_view = directory + "/Fall_View"
    adl_view = directory + "/Non_Fall_View"
    
    output_folder_fall = os.path.join(directory, 'Fall_label')
    output_folder_adl = os.path.join(directory, 'Non_Fall_label')
    
    # Crear carpeta de labels si no existe
    os.makedirs(output_folder_fall, exist_ok=True)
    os.makedirs(output_folder_adl, exist_ok=True)
    os.makedirs(fall_view, exist_ok=True)
    os.makedirs(adl_view, exist_ok=True)

    both_directories = [(fall_dc, output_folder_fall, fall_view, 1), (adl_dc, output_folder_adl, adl_view, 0)]
    
    for directory in both_directories: 
        label = directory[3]
        for img_file in os.listdir(directory[0]):
            if img_file.endswith(('.jpg', '.png', '.jpeg')):
                img_path = os.path.join(directory[0], img_file)
                img = cv2.imread(img_path)
                if img is None:
                    print(f"Error leyendo {img_path}")
                    continue
                img_height, img_width = img.shape[:2]
                results = model.predict(img, classes=[0], max_det=1, conf=0.4, device=0)
    
                # Tomamos las predicciones
                if len(results)>0:
                    for r in results:
                        if r.boxes.xywh is not None and len(r.boxes.xywh) > 0:
                            x,y,w,h = r.boxes.xywh[0]
                            x_norm = float(x)/img_width
                            y_norm = float(y)/img_height
                            w_norm = float(w)/img_width
                            h_norm = float(h)/img_height
                
                            x1 = int(x - w / 2)
                            y1 = int(y - h / 2)
                            x2 = int(x + w / 2)
                            y2 = int(y + h / 2)
                            # Crear archivo .txt
                            label_file = os.path.join(directory[1], img_file.replace('.jpg', '.txt').replace('.png', '.txt'))
                            with open(label_file, 'a') as f:
                                f.write(f"{str(label)} {x_norm:.6f} {y_norm:.6f} {w_norm:.6f} {h_norm:.6f}\n")
                
                            cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)
                img_view_name = os.path.join(directory[2], img_file)
                cv2.imwrite(img_view_name, img)
    print("Extraccion de bounding boxes terminada")

In [4]:
test_dic = ('dataset/Multiple_Cameras')
generate_bbox(test_dic)


0: 448x640 1 person, 106.1ms
Speed: 4.9ms preprocess, 106.1ms inference, 155.4ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 person, 21.9ms
Speed: 2.8ms preprocess, 21.9ms inference, 4.0ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 20.3ms
Speed: 2.4ms preprocess, 20.3ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 person, 21.8ms
Speed: 2.2ms preprocess, 21.8ms inference, 3.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 person, 23.1ms
Speed: 2.8ms preprocess, 23.1ms inference, 3.0ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 person, 17.2ms
Speed: 2.1ms preprocess, 17.2ms inference, 2.8ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 1 person, 15.6ms
Speed: 2.1ms preprocess, 15.6ms inference, 2.7ms postprocess per image at shape (1, 3, 448, 640)

0: 448x640 (no detections), 16.2ms
Speed: 1.9ms preprocess, 16.2ms inference, 1.9ms postprocess per image 