In [6]:
import json
from pathlib import Path
import shutil
from tqdm import tqdm

# Caminhos base
json_path = Path("annotations.json")  # Atualiza se necessário
image_dir = Path("all_images")
label_dir = Path("new_labels")
output_dir = Path("output")

# Ler JSON
with open(json_path, 'r') as f:
    data = json.load(f)

# Helper para copiar imagens + labels
def copy_split(image_ids, split_name, target_root):
    images_out = target_root / split_name / "images"
    labels_out = target_root / split_name / "labels"

    images_out.mkdir(parents=True, exist_ok=True)
    labels_out.mkdir(parents=True, exist_ok=True)

    for img_id in tqdm(image_ids, desc=f"{target_root.name}/{split_name}"):
        image_info = next(img for img in data['images'] if img['id'] == img_id)
        file_name = image_info['file_name']
        stem = Path(file_name).stem

        # Copiar imagem
        img_src = image_dir / file_name
        img_dst = images_out / file_name
        if img_src.exists():
            shutil.copy2(img_src, img_dst)

        # Copiar label
        label_src = label_dir / f"{stem}.txt"
        label_dst = labels_out / f"{stem}.txt"
        if label_src.exists():
            shutil.copy2(label_src, label_dst)

# Criar splits principais (dataset completo)
for split in ['train', 'val', 'test']:
    ids = data['splits'][split]['image_ids']
    copy_split(ids, split, output_dir / "complete")

# Criar splits para chessred2k
for split in ['train', 'val', 'test']:
    ids = data['splits']['chessred2k'][split]['image_ids']
    copy_split(ids, split, output_dir / "chessred2k")

complete/train: 100%|██████████| 6479/6479 [00:14<00:00, 444.43it/s]
complete/val: 100%|██████████| 2192/2192 [00:04<00:00, 459.72it/s]
complete/test: 100%|██████████| 2129/2129 [00:04<00:00, 441.62it/s]
chessred2k/train: 100%|██████████| 1442/1442 [00:04<00:00, 360.20it/s]
chessred2k/val: 100%|██████████| 330/330 [00:00<00:00, 350.60it/s]
chessred2k/test: 100%|██████████| 306/306 [00:00<00:00, 352.66it/s]
