In [26]:
import subprocess

subprocess.run(["mv", "../data/traffic light", "../data/traffic_light"])

mv: cannot stat '../data/traffic light': No such file or directory


CompletedProcess(args=['mv', '../data/traffic light', '../data/traffic_light'], returncode=1)

In [27]:
# Célula 1: Importações e Definições de Funções
import os
import shutil
from sklearn.model_selection import train_test_split
import random
from pathlib import Path

def create_directories(base_path):
    """Cria as pastas necessárias para treino e validação"""
    dirs = ['images/train', 'images/val', 'labels/train', 'labels/val']
    for dir_path in dirs:
        os.makedirs(os.path.join(base_path, dir_path), exist_ok=True)

def split_dataset(source_dir, output_dir, val_split=0.2, seed=42):
    """
    Separa o dataset em treino e validação
    
    Args:
        source_dir: Diretório com as imagens e labels originais
        output_dir: Diretório onde serão criadas as pastas train/val
        val_split: Proporção do dataset para validação (padrão: 0.2)
        seed: Seed para reproducibilidade
    """
    random.seed(seed)
    
    source_dir = Path(source_dir)
    output_dir = Path(output_dir)
    
    create_directories(output_dir)
    
    # Lista todos os arquivos de imagem
    image_files = [f for f in (source_dir / 'images').glob('*.jpg')]
    
    # Divide os arquivos em treino e validação
    train_files, val_files = train_test_split(image_files, 
                                            test_size=val_split,
                                            random_state=seed)
    
    def copy_files(files, split_type):
        for f in files:
            # Copia imagem
            dst_img = output_dir / 'images' / split_type / f.name
            shutil.copy2(f, dst_img)
            
            # Procura e copia label correspondente
            label_name = f.stem + '.xml'
            src_label = source_dir / 'pascal' / label_name
            if src_label.exists():
                dst_label = output_dir / 'labels' / split_type / label_name
                shutil.copy2(src_label, dst_label)
    
    # Copia arquivos para as respectivas pastas
    copy_files(train_files, 'train')
    copy_files(val_files, 'val')
    
    print(f'Dataset dividido com sucesso!')
    print(f'Treino: {len(train_files)} imagens')
    print(f'Validação: {len(val_files)} imagens')

In [29]:
# Célula 2: Execução da função
source_directory = "../data/traffic_light"  # diretório com as imagens originais
output_directory = "../data"               # diretório onde serão criadas as subpastas

split_dataset(
    source_dir=source_directory,
    output_dir=output_directory,
    val_split=0.2,
    seed=42
)

Dataset dividido com sucesso!
Treino: 160 imagens
Validação: 40 imagens


In [33]:
# Célula 3: Verificação
def verify_splits():
    base_dir = Path("../dataset")
    
    for split in ['train', 'val']:
        images = list((base_dir / 'images' / split).glob('*.jpg'))
        labels = list((base_dir / 'labels' / split).glob('*.txt'))
        
        print(f'Split {split}:')
        print(f'  Imagens: {len(images)}')
        print(f'  Labels: {len(labels)}')
        print(f'  Matched: {len(images) == len(labels)}')
        print()

verify_splits()

Split train:
  Imagens: 0
  Labels: 0
  Matched: True

Split val:
  Imagens: 0
  Labels: 0
  Matched: True



In [31]:
import os

train_image_names = os.listdir('../data/images/train')
train_image_names = [name.split('.')[0] for name in train_image_names]
train_image_names = set(train_image_names)

train_label_names = os.listdir('../data/labels/train')
train_label_names = [name.split('.')[0] for name in train_label_names]
train_label_names = set(train_label_names)

train_image_names == train_label_names

True

In [34]:
val_image_names = os.listdir('../data/images/val')
val_image_names = [name.split('.')[0] for name in val_image_names]
val_image_names = set(val_image_names)

val_label_names = os.listdir('../data/labels/val')
val_label_names = [name.split('.')[0] for name in val_label_names]
val_label_names = set(val_label_names)

val_image_names == val_label_names

True