In [2]:
import os
import json
import shutil
from PIL import Image
from tqdm import tqdm

# ---------- user params (عدل المسارات لو عندك مسارات مختلفة) ----------
train_root = "Jersey number data and labels/jersey-2023/train/images"   # فولدر train فيه فولدرات 0,1,2...
train_json = "Jersey number data and labels/jersey-2023/train/train_gt.json"

val_root = "Jersey number data and labels/jersey-2023/test/images"      # فولدر val/test
val_json = "Jersey number data and labels/jersey-2023/test/test_gt.json"

output_dir = "yolo_dataset"                # الناتج هيبقى هنا
include_unknown = False                    # لو عايز تضيف -1 كـ class غير مفعل الآن
# -----------------------------------------------------------------------

os.makedirs(output_dir, exist_ok=True)

# --- Load mappings ---
def load_json_safe(p):
    if os.path.exists(p):
        with open(p, "r") as f:
            return json.load(f)
    return {}

train_map = load_json_safe(train_json)
val_map = load_json_safe(val_json)

# اجمع كل الأرقام الفريدة (ما بنضم -1 إلا لو اخترت include_unknown)
all_nums = set()
for m in (train_map, val_map):
    for k, v in m.items():
        if v == -1 and not include_unknown:
            continue
        all_nums.add(v)

if -1 in all_nums and not include_unknown:
    all_nums.remove(-1)

jersey_numbers = sorted(all_nums)
num_to_classid = {num: idx for idx, num in enumerate(jersey_numbers)}

print("Found jersey numbers (num -> class_id):")
print(num_to_classid)

# --- prepare output folders ---
for split in ["train", "val"]:
    os.makedirs(os.path.join(output_dir, split, "images"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, split, "labels"), exist_ok=True)

# --- helper to convert a split ---
def convert_split(split_name, root_dir, json_map):
    print(f"\nConverting split: {split_name}  -- root: {root_dir}")
    missing_folders = 0
    converted = 0

    for folder, jersey_num in tqdm(json_map.items(), desc=f"Folders in {split_name}"):
        # تجاهل -1 لو مش عايز تضيفها
        if jersey_num == -1 and not include_unknown:
            continue

        if jersey_num not in num_to_classid:
            # لو فيه رقم جديد وغير متوقع (نادر) نعبر عنه
            print(f"Warning: jersey number {jersey_num} not in mapping; skipping.")
            continue

        class_id = num_to_classid[jersey_num]
        folder_path = os.path.join(root_dir, folder)
        if not os.path.exists(folder_path):
            missing_folders += 1
            continue

        for img_file in os.listdir(folder_path):
            if not img_file.lower().endswith((".jpg", ".jpeg", ".png")):
                continue

            src_img = os.path.join(folder_path, img_file)
            # اسم جديد للصورة عشان متعارضاش: folder_img.jpg
            new_img_name = f"{folder}_{img_file}"
            dst_img = os.path.join(output_dir, split_name, "images", new_img_name)
            shutil.copy2(src_img, dst_img)

            # نستخدم bbox = full image (x_center=0.5 y_center=0.5 w=1 h=1)
            # لو عندك bounding boxes دقيقة لاحقًا، ممكن تحطها هنا:
            x_center, y_center, w_norm, h_norm = 0.5, 0.5, 1.0, 1.0

            label_name = os.path.splitext(new_img_name)[0] + ".txt"
            dst_lbl = os.path.join(output_dir, split_name, "labels", label_name)
            with open(dst_lbl, "w") as f:
                f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")

            converted += 1

    print(f"Converted {converted} images for {split_name}. Missing folders: {missing_folders}")

# Convert both splits
convert_split("train", train_root, train_map)
convert_split("val", val_root, val_map)

# --- write class_map.json and jersey.yaml ---
class_map_path = os.path.join(output_dir, "class_map.json")
with open(class_map_path, "w") as f:
    json.dump(num_to_classid, f, indent=2, ensure_ascii=False)

# prepare names list as strings (sorted by class_id)
names_ordered = [str(n) for n in jersey_numbers]

yaml_content = {
    "train": os.path.join(output_dir, "train", "images"),
    "val": os.path.join(output_dir, "val", "images"),
    "nc": len(names_ordered),
    "names": names_ordered
}

# save jersey.yaml
import yaml
yaml_path = os.path.join(output_dir, "jersey.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(yaml_content, f, allow_unicode=True)

print("\n✅ Done.")
print(f"- YOLO dataset root: {output_dir}")
print(f"- class map saved to: {class_map_path}")
print(f"- yaml file saved to: {yaml_path}")
print(f"- number of classes (nc): {yaml_content['nc']}")


Found jersey numbers (num -> class_id):
{1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 32: 31, 33: 32, 34: 33, 35: 34, 36: 35, 38: 36, 40: 37, 43: 38, 44: 39, 45: 40, 50: 41, 53: 42, 55: 43, 56: 44, 59: 45, 60: 46, 62: 47, 69: 48, 75: 49, 76: 50, 78: 51, 93: 52, 99: 53}

Converting split: train  -- root: Jersey number data and labels/jersey-2023/train/images


Folders in train: 100%|██████████| 1427/1427 [57:59<00:00,  2.44s/it] 


Converted 560744 images for train. Missing folders: 0

Converting split: val  -- root: Jersey number data and labels/jersey-2023/test/images


Folders in val: 100%|██████████| 1211/1211 [36:16<00:00,  1.80s/it] 

Converted 434457 images for val. Missing folders: 0

✅ Done.
- YOLO dataset root: yolo_dataset
- class map saved to: yolo_dataset\class_map.json
- yaml file saved to: yolo_dataset\jersey.yaml
- number of classes (nc): 54





In [None]:
from ultralytics import YOLO
model = YOLO("yolo11n.pt")  
model.train(data="yolo_dataset/jersey.yaml", epochs=50, imgsz=640, batch=16)

Ultralytics 8.3.199  Python-3.13.7 torch-2.8.0+cpu CPU (11th Gen Intel Core i7-1165G7 @ 2.80GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=yolo_dataset/jersey.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train6, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=