In [None]:
import os
from pathlib import Path
from datasets import load_dataset
from PIL import Image
from tqdm import tqdm
import yaml


# CONFIGURAÇÃO INICIAL
DATASET_HUGGINGFACE_ID = "CUHK-CSE/wider_face"
YOLO_DATASET_PATH = Path("./wider_face_yolo")
YAML_CONFIG_FILE = "wider_face_config.yaml"


# FUNÇÃO DE CONVERSÃO DE FORMATO
def convert_bbox_to_yolo(img_size, bbox):
    img_width, img_height = img_size
    x_min, y_min, w, h = bbox
    
    x_center = x_min + w / 2
    y_center = y_min + h / 2
    
    x_center_norm = x_center / img_width
    y_center_norm = y_center / img_height
    width_norm = w / img_width
    height_norm = h / img_height
    
    return f"0 {x_center_norm} {y_center_norm} {width_norm} {height_norm}"


# PROCESSAMENTO DO DATASET
def prepare_dataset():
    print(f"Iniciando download do dataset: {DATASET_HUGGINGFACE_ID}")
    dataset = load_dataset(DATASET_HUGGINGFACE_ID, trust_remote_code=True)
    print("Download concluído.")
    
    splits_to_process = {'train': dataset['train'], 'validation': dataset['validation']}
    
    for split_name, split_data in splits_to_process.items():
        print(f"\nProcessando o conjunto '{split_name}'")
        
        images_dir = YOLO_DATASET_PATH / "images" / split_name
        labels_dir = YOLO_DATASET_PATH / "labels" / split_name
        
        images_dir.mkdir(parents=True, exist_ok=True)
        labels_dir.mkdir(parents=True, exist_ok=True)
        
        for i, item in enumerate(tqdm(split_data, desc=f"Salvando {split_name}")):
            image = item['image']
            faces = item['faces']
            
            image_filename = f"{split_name}_{i}.jpg"
            label_filename = f"{split_name}_{i}.txt"
            
            image.save(images_dir / image_filename)
            
            yolo_annotations = []
            if faces['bbox']:
                for bbox in faces['bbox']:
                    yolo_line = convert_bbox_to_yolo(image.size, bbox)
                    yolo_annotations.append(yolo_line)
            
            if yolo_annotations:
                with open(labels_dir / label_filename, 'w') as f:
                    f.write("\n".join(yolo_annotations))


# CRIAÇÃO DO ARQUIVO DE CONFIGURAÇÃO YAML
def create_yolo_config_file():
    print(f"\nCriando arquivo de configuração: {YAML_CONFIG_FILE}")
    
    config_data = {
        'path': str(YOLO_DATASET_PATH.absolute()),
        'train': 'images/train',
        'val': 'images/validation',
        'test': 'images/validation',
        'names': {
            0: 'face'
        }
    }
    
    with open(YAML_CONFIG_FILE, 'w') as f:
        yaml.dump(config_data, f, sort_keys=False, default_flow_style=False)
    
    print("Arquivo de configuração criado com sucesso.")

In [None]:
prepare_dataset()
create_yolo_config_file()

print("\n" + "="*50)
print("      PREPARAÇÃO CONCLUÍDA COM SUCESSO!")
print("="*50)

train_command = (
    f"yolo train "
    f"model=yolov8s.pt "
    f"data={YAML_CONFIG_FILE} "
    f"epochs=50 "
    f"imgsz=640 "
    f"batch=16 "
    f"device=0"
)

print("TREINAR YOLO:")
print("PS: `pip install ultralytics`).\n")
print(f"{train_command}\n\n")