In [40]:
from PIL import Image
import pandas as pd
import os
import glob
import shutil
import json
import re
from sklearn.model_selection import train_test_split

In [34]:
#cargando json con el encoding de labels a un diccionario
ruta_json='../dataset/labels.json'
with open(ruta_json, 'r') as f:
      labels_dict = json.load(f)

In [58]:
ruta_json='../raw/Medellin/anotaciones/'
rows = []

for archivo in os.listdir(ruta_json):
      if archivo.endswith('.json'):
            ruta_archivo = os.path.join(ruta_json, archivo)
            with open(ruta_archivo, 'r', encoding='utf-8') as f:
                  data = json.load(f)
                  image_key = data.get("key")
                  img_width = data.get("width")
                  img_height = data.get("height")

                  match = re.search(r'_(\d+)\.', image_key)
                  frame = int(match.group(1)) if match else None


                  for box in data.get("boxes", []):
                        row = {
                              "file": image_key,
                              "frame": frame,
                              "image_width": img_width,
                              "image_height": img_height,
                              "id": box.get("id"),
                              "label": box.get("label"),
                              "x": float(box.get("x")),
                              "y": float(box.get("y")),
                              "w": float(box.get("width")),
                              "h": float(box.get("height")),
                              "confidence": box.get("confidence")
                        }
                        rows.append(row)

# Convert to DataFrame
df = pd.DataFrame(rows)

In [59]:
df.head()

Unnamed: 0,file,frame,image_width,image_height,id,label,x,y,w,h,confidence
0,medellin_1.jpg,1,1280,720,1,DHelmet,460.0,612.0,78.0,158.0,
1,medellin_10.jpg,10,1280,720,1,DHelmet,896.88,402.06,55.76,117.46,
2,medellin_10.jpg,10,1280,720,2,DHelmet,1159.18,227.6,60.37,78.54,
3,medellin_52.jpg,52,1280,720,1,DHelmet,467.81,122.99,21.66,42.76,
4,medellin_52.jpg,52,1280,720,2,DHelmet,492.52,70.52,22.21,38.87,


In [60]:
df_imagenes_unicas = df[['file', 'frame']].drop_duplicates().reset_index(drop=True)
print(f"Imagenes unicas: {len(df_imagenes_unicas)}")

Imagenes unicas: 92


In [61]:
df_imagenes_unicas.head()

Unnamed: 0,file,frame
0,medellin_1.jpg,1
1,medellin_10.jpg,10
2,medellin_52.jpg,52
3,medellin_11.jpg,11
4,medellin_12.jpg,12


In [62]:
ruta_origen='../raw/Medellin'
ruta_destino_imagenes='../dataset/images/test'
ruta_destino_anotaciones='../dataset/labels/test'
image_width=1280
image_height=720
target_size=(640, 640)


for index, row in df_imagenes_unicas.iterrows():
    #copiando imagen
    file = row['file']
    frame = row['frame']
    frame_jpg = row['file'].split('.')[0]
    ruta_imagen_origen = os.path.join(ruta_origen,'imagenes', f'{frame_jpg}.jpg')
    ruta_imagen_destino = os.path.join(ruta_destino_imagenes, f'{frame_jpg}.jpg')
    #shutil.copy(ruta_imagen_origen, ruta_imagen_destino)

    try:
            img = Image.open(ruta_imagen_origen)
            img_resized = img.resize(target_size)
            img_resized.save(ruta_imagen_destino)
    except FileNotFoundError:
        print(f"Warning: Image not found at {ruta_imagen_origen}. Skipping.")
        continue # Saltar a la siguiente imagen si no se encuentra

    #generando anotaciones
    txt_filename = f"{frame_jpg}.txt"
    txt_filepath = os.path.join(ruta_destino_anotaciones, txt_filename)
    for index, row in df[(df.file==file) & (df.frame==frame)].iterrows():
        class_id=labels_dict[row['label']]
        x_center = int(row['x']+row['w']/2)
        y_center = int(row['y']+row['h']/2)

        x_center_norm = x_center / image_width
        y_center_norm = y_center / image_height
        width_norm = row['w'] / image_width
        height_norm = row['h'] / image_height

        yolov8_annotation=f"{class_id} {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}\n"

        with open(txt_filepath, 'a') as f:
            f.write(yolov8_annotation)
