## Notebook: Selecionar imagens sem objetos aleatoriamente no formato YOLO

In [17]:
import os
import random
import shutil
from glob import glob
from tqdm import tqdm

from PIL import Image

In [18]:
# Diretórios do dataset YOLO
pwd = os.getcwd()

dataset_dir = os.path.abspath(os.path.join(
    pwd, '..', '..', 'data', 'ships_v10i'))

# Diretório onde salvar as áreas recortadas sem objetos
output_dir = os.path.abspath(os.path.join(
    pwd, '..', '..', 'data', 'categorized_images', 'background'))

os.makedirs(output_dir, exist_ok=True)

# Número de áreas que você deseja extrair
num_samples = 3400

# Tamanho da área recortada (largura e altura em pixels)
crop_size = (224, 224)

# ### 2. Encontrar imagens e labels com objetos

In [19]:
def get_images_and_labels(images_dir, labels_dir):
    images_paths = glob(os.path.join(images_dir, '*'))
    images_labels = []

    for img_path in images_paths:
        img_name = os.path.basename(img_path)
        label_path = os.path.join(labels_dir, img_name.replace(
            '.jpg', '.txt').replace('.png', '.txt'))

        # Apenas imagens com labels não vazios
        if os.path.exists(label_path) and os.path.getsize(label_path) > 0:
            images_labels.append((img_path, label_path))

    return images_labels

In [20]:
sets = ['train', 'valid', 'test']
images_with_labels = []

for s in sets:
    images_dir = os.path.join(dataset_dir, s, 'images')
    labels_dir = os.path.join(dataset_dir, s, 'labels')

    images_labels = get_images_and_labels(images_dir, labels_dir)
    images_with_labels.extend(images_labels)

print(f'Total de imagens com objetos encontradas: {len(images_with_labels)}')

Total de imagens com objetos encontradas: 2851


In [21]:
def area_has_objects(label_path, crop_box, img_size):
    with open(label_path, 'r') as f:
        for line in f:
            _, x_center, y_center, width, height = map(float, line.split())
            x_center *= img_size[0]
            y_center *= img_size[1]
            width *= img_size[0]
            height *= img_size[1]

            box = [x_center - width/2, y_center - height/2,
                   x_center + width/2, y_center + height/2]

            if not (crop_box[2] <= box[0] or crop_box[0] >= box[2] or
                    crop_box[3] <= box[1] or crop_box[1] >= box[3]):
                return True
    return False

In [22]:
random.seed(42)
selected_count = 0

pbar = tqdm(total=num_samples, desc="Selecionando áreas sem objetos")

while selected_count < num_samples:
    img_path, label_path = random.choice(images_with_labels)
    img = Image.open(img_path)
    img_width, img_height = img.size

    if img_width < crop_size[0] or img_height < crop_size[1]:
        continue

    x_max = img_width - crop_size[0]
    y_max = img_height - crop_size[1]

    x_start = random.randint(0, x_max)
    y_start = random.randint(0, y_max)

    crop_box = [x_start, y_start, x_start +
                crop_size[0], y_start + crop_size[1]]

    if not area_has_objects(label_path, crop_box, (img_width, img_height)):
        cropped_img = img.crop(crop_box)
        cropped_img.save(os.path.join(
            output_dir, f'no_object_{selected_count}.jpg'))
        selected_count += 1
        pbar.update(1)

pbar.close()
print(f'Áreas sem objetos copiadas com sucesso para {output_dir}')

Selecionando áreas sem objetos: 100%|██████████| 3400/3400 [01:56<00:00, 29.29it/s]

Áreas sem objetos copiadas com sucesso para /mnt/c/Users/Ricardo/Documents/Development/tcc-1/data/categorized_images/background



