In [1]:
# Environment Check
!pip -q install "ultralytics>=8.3.70"

import torch, ultralytics, sys, numpy as np, random, os
SEED = 42
random.seed(SEED); np.random.seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

print("torch:", torch.__version__, "| cuda:", torch.cuda.is_available(),
      "| ultralytics:", ultralytics.__version__, "| py:", sys.version.split()[0])
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m0.6/1.1 MB[0m [31m19.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hCreating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
torch: 2.8.0+cu126 | cuda: True | ultralytics: 8.3.205 | py: 3.12.11
GPU: NVIDIA A100-SXM4-40GB
VRAM: 42.5 GB


In [2]:
# Data Collections and paths

from google.colab import drive
drive.mount('/content/drive')

import os, pathlib

DRIVE_ROOT = "/content/drive/MyDrive"
LOCAL_ROOT = "/content"

# Look for either a zip or a folder (zip preferred)
zip_candidates = ["ObjectDetection.zip"]
folder_candidates = ["ObjectDetection"]

found_zip = next((os.path.join(DRIVE_ROOT, z)
                  for z in zip_candidates
                  if os.path.exists(os.path.join(DRIVE_ROOT, z))), None)
found_folder = next((os.path.join(DRIVE_ROOT, f)
                     for f in folder_candidates
                     if os.path.isdir(os.path.join(DRIVE_ROOT, f))), None)

if found_zip:
    print(f"Unzipping from Drive -> local SSD: {found_zip}")
    !unzip -q "$found_zip" -d "/content"
    BASE = pathlib.Path("/content/ObjectDetection")
elif found_folder:
    print(f"Copying folder from Drive -> local SSD: {found_folder}")
    !rsync -a --info=progress2 "$found_folder" "/content/"
    BASE = pathlib.Path("/content/ObjectDetection")
else:
    raise FileNotFoundError(
        "Did not find 'ObjectDetection.zip' or 'ObjectDetection/' in My Drive.\n"
        "Upload one of them to /MyDrive and rerun this cell."
    )

DATA = BASE / "data"
META = BASE / "meta"
LOGS = BASE / "logs"
for p in (DATA, META, LOGS):
    p.mkdir(parents=True, exist_ok=True)

print("Local project root:", BASE)
print("Subfolders now:", [p.name for p in BASE.iterdir()])


Mounted at /content/drive
Unzipping from Drive -> local SSD: /content/drive/MyDrive/ObjectDetection.zip
Local project root: /content/ObjectDetection
Subfolders now: ['data', 'logs', 'meta']


In [3]:
# Data Collection Check
from pathlib import Path
import sys, glob

raw_dir = DATA / "raw"
need_meta = [META / "identity_CelebA.txt", META / "selected_ids.json"]

missing_meta = [str(p) for p in need_meta if not p.exists()]
raw_ok = raw_dir.exists() and any(raw_dir.iterdir())

if missing_meta:
    print(" Missing meta files:\n - " + "\n - ".join(missing_meta))
if not raw_ok:
    print(f" Missing or empty raw directory: {raw_dir}\n")

if missing_meta or not raw_ok:
    sys.exit("Please add the missing items to your Drive")

# quick count
jpgs = glob.glob(str(raw_dir / "images__*_" / "**" / "*.jpg"), recursive=True)
pngs = glob.glob(str(raw_dir / "images__*_" / "**" / "*.png"), recursive=True)
print(f"Raw images found: {len(jpgs) + len(pngs):,}")
print("Setup looks good. Ready for the next steps when you are.")


Raw images found: 1,508
Setup looks good. Ready for the next steps when you are.


In [4]:
# Build mappings + quick audit of raw counts
import json, re, glob
from pathlib import Path

# Reuse paths from earlier blocks
BASE = Path("/content/ObjectDetection")
DATA = BASE / "data"
META = BASE / "meta"

# selected_ids.json -> id_remap.json
sel_path = META / "selected_ids.json"
assert sel_path.exists(), f"Missing: {sel_path}"
raw_ids = json.loads(sel_path.read_text())
id_remap = {str(int(r)): i for i, r in enumerate(raw_ids)}
(META / "id_remap.json").write_text(json.dumps(id_remap, indent=2))
print("Wrote:", META / "id_remap.json", f"({len(raw_ids)} classes)")

# identity_CelebA.txt -> image_to_id.json
id_txt = META / "identity_CelebA.txt"
assert id_txt.exists(), f"Missing: {id_txt}"
pat = re.compile(r"^\s*(\d{6}\.jpg)\s+(\d+)\s*$")
image_to_id = {}
with id_txt.open("r", encoding="utf-8", errors="ignore") as f:
    for line in f:
        m = pat.match(line)
        if m:
            image_to_id[m.group(1)] = int(m.group(2))
(META / "image_to_id.json").write_text(json.dumps(image_to_id, indent=2))
print("Wrote:", META / "image_to_id.json", "->", len(image_to_id), "rows")

# Quick audit: raw counts per selected ID
raw_dir = DATA / "raw"
ex = {".jpg",".jpeg",".png"}
def count_images_for_id(cid:int):
    d = raw_dir / f"images__{cid}_"
    if not d.exists(): return 0
    return sum(1 for p in d.rglob("*") if p.suffix.lower() in ex)

counts = {cid: count_images_for_id(int(cid)) for cid in raw_ids}
zero = [cid for cid,n in counts.items() if n == 0]
print(f"Classes: {len(raw_ids)} | IDs with ≥1 raw image: {sum(n>0 for n in counts.values())}")
if zero:
    print("WARNING — IDs with 0 raw images (fix before next block):", zero)
else:
    # Small Summary
    total_imgs = sum(counts.values())
    smallest = sorted(counts.items(), key=lambda kv: kv[1])[:5]
    largest  = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:5]
    print(f"Total raw images across selected IDs: {total_imgs}")
    print("Lowest 5 counts:", smallest)
    print("Highest 5 counts:", largest)


Wrote: /content/ObjectDetection/meta/id_remap.json (44 classes)
Wrote: /content/ObjectDetection/meta/image_to_id.json -> 202599 rows
Classes: 44 | IDs with ≥1 raw image: 44
Total raw images across selected IDs: 1508
Lowest 5 counts: [(8265, 10), (3321, 18), (3401, 18), (6098, 20), (228, 21)]
Highest 5 counts: [(3699, 100), (3745, 100), (9152, 100), (9256, 100), (3782, 36)]


In [5]:
# Data Augmenting images for each celebrity with 100 images

from pathlib import Path
import json, random, csv, gc
from PIL import Image, ImageOps, ImageEnhance, ImageFilter

BASE = Path("/content/ObjectDetection")
DATA = BASE / "data"
META = BASE / "meta"

RAW_ROOT  = DATA / "raw"
SEL_JSON  = META / "selected_ids.json"
OUT_ROOT  = DATA / "single_100"
LOG_CSV   = META / "augment_log.csv"
TARGET = 100
SEED = 42
random.seed(SEED)

def aug_once(img):
    if random.random() < 0.5:
        img = ImageOps.mirror(img)
    if random.random() < 0.45:
        img = img.rotate(random.uniform(-5, 5), resample=Image.BICUBIC, expand=False)
    if random.random() < 0.6:
        img = ImageEnhance.Brightness(img).enhance(random.uniform(0.85, 1.15))
        img = ImageEnhance.Contrast(img).enhance(random.uniform(0.9, 1.1))
        img = ImageEnhance.Color(img).enhance(random.uniform(0.9, 1.1))
    if random.random() < 0.25:
        img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.0, 1.0)))
    if random.random() < 0.25:
        w, h = img.size
        img = img.resize((max(1, int(w*0.9)), max(1, int(h*0.9))), Image.BICUBIC).resize((w, h), Image.BICUBIC)
    return img

def collect_sources_for_id(cid:int):
    d = RAW_ROOT / f"images__{cid}_"
    if not d.is_dir(): return []
    exts = {".jpg", ".jpeg", ".png"}
    return [p for p in d.rglob("*") if p.suffix.lower() in exts]

def main():
    sel = [int(x) for x in json.loads(SEL_JSON.read_text())]
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    LOG_CSV.parent.mkdir(parents=True, exist_ok=True)

    with LOG_CSV.open("w", newline="") as fcsv:
        w = csv.DictWriter(fcsv, fieldnames=[
            "id","out_file","src","type","mirror","rotate","brightness","contrast","saturation","blur","down_up"
        ])
        w.writeheader()

        for cid in sel:
            srcs = collect_sources_for_id(cid)
            out_dir = OUT_ROOT / str(cid)
            out_dir.mkdir(parents=True, exist_ok=True)
            for old in out_dir.glob("*.jpg"): old.unlink()

            if not srcs:
                print(f"[WARN] ID {cid}: no sources found in {RAW_ROOT}/images__{cid}_/")
                continue

            base = []
            for p in srcs:
                try:
                    with Image.open(p) as im:
                        im = im.convert("RGB")
                        dst = out_dir / f"orig_{p.stem}.jpg"
                        im.save(dst, quality=95)
                    w.writerow({"id":cid,"out_file":dst.name,"src":str(p),"type":"orig"})
                    base.append(dst)
                    if len(base) >= TARGET: break
                except Exception as e:
                    print("  skip:", p, e)

            i = 0
            while len(list(out_dir.glob("*.jpg"))) < TARGET and base:
                pick = random.choice(base)
                with Image.open(pick) as im:
                    im = im.convert("RGB")
                    log = {"id":cid, "out_file":f"aug_{i:06d}.jpg", "src":pick.name, "type":"aug"}

                    log["mirror"]=log["rotate"]=log["brightness"]=log["contrast"]=log["saturation"]=log["blur"]=log["down_up"]=None
                    before = im
                    im2 = im

                    import random as _r
                    if _r.random() < 0.5:
                        im2 = ImageOps.mirror(im2); log["mirror"] = True
                    if _r.random() < 0.45:
                        ang = _r.uniform(-5, 5); im2 = im2.rotate(ang, resample=Image.BICUBIC, expand=False); log["rotate"] = round(ang,2)
                    if _r.random() < 0.6:
                        b = _r.uniform(0.85, 1.15); im2 = ImageEnhance.Brightness(im2).enhance(b); log["brightness"] = round(b,3)
                        c = _r.uniform(0.9, 1.1);  im2 = ImageEnhance.Contrast(im2).enhance(c);   log["contrast"]   = round(c,3)
                        s = _r.uniform(0.9, 1.1);  im2 = ImageEnhance.Color(im2).enhance(s);      log["saturation"] = round(s,3)
                    if _r.random() < 0.25:
                        r = _r.uniform(0.0, 1.0); im2 = im2.filter(ImageFilter.GaussianBlur(radius=r)); log["blur"] = round(r,3)
                    if _r.random() < 0.25:
                        w0,h0 = im2.size
                        k = 0.9; im2 = im2.resize((max(1,int(w0*k)), max(1,int(h0*k))), Image.BICUBIC).resize((w0,h0), Image.BICUBIC); log["down_up"] = k

                    outp = out_dir / log["out_file"]
                    im2.save(outp, quality=90)
                    w.writerow(log)
                i += 1

            built = len(list(out_dir.glob("*.jpg")))
            print(f"ID {cid}: {built}/{TARGET}")
            gc.collect()

    print("Augmentation log ->", LOG_CSV)

main()


ID 228: 100/100
ID 447: 100/100
ID 487: 100/100
ID 800: 100/100
ID 1158: 100/100
ID 1499: 100/100
ID 1852: 100/100
ID 1964: 100/100
ID 2425: 100/100
ID 2463: 100/100
ID 2522: 100/100
ID 2562: 100/100
ID 2820: 100/100
ID 2837: 100/100
ID 2880: 100/100
ID 3227: 100/100
ID 3321: 100/100
ID 3401: 100/100
ID 3431: 100/100
ID 3698: 100/100
ID 3699: 100/100
ID 3745: 100/100
ID 3782: 100/100
ID 4126: 100/100
ID 4304: 100/100
ID 5239: 100/100
ID 6098: 100/100
ID 6568: 100/100
ID 7282: 100/100
ID 7904: 100/100
ID 8045: 100/100
ID 8265: 100/100
ID 8656: 100/100
ID 8722: 100/100
ID 8871: 100/100
ID 8945: 100/100
ID 8968: 100/100
ID 9063: 100/100
ID 9151: 100/100
ID 9152: 100/100
ID 9256: 100/100
ID 9319: 100/100
ID 10046: 100/100
ID 10173: 100/100
Augmentation log -> /content/ObjectDetection/meta/augment_log.csv


In [6]:
# Build mixed 2x2/3x3/4x4 composites (no overlap) + YOLO labels + CSV log
import random, json, csv, gc
from pathlib import Path
from PIL import Image

BASE = Path("/content/ObjectDetection")
DATA = BASE / "data"
META = BASE / "meta"

AUG_DIR = DATA / "single_100"
OUT_IMG = DATA / "synth_multi/images"
OUT_LAB = DATA / "synth_multi/labels"
LOG_CSV = META / "composites_log.csv"

CANVAS = (1024, 1024)

# Grid mix: (rows, cols, proportion, (outer_margin, gutter), fill_range)
GRID_MIX = [
    (2, 2, 0.40, (32, 24), (0.88, 0.96)),  # big faces
    (3, 3, 0.40, (24, 18), (0.70, 0.90)),  # medium faces
    (4, 4, 0.20, (16, 12), (0.55, 0.75)),  # small faces
]

TOT_SAMPLES = 10_000
SEED = 123
random.seed(SEED)

REMAP = json.loads((META / "id_remap.json").read_text())
REMAP = {int(k): int(v) for k, v in REMAP.items()}
SELECTED = set(REMAP.keys())

def cell_rects(W, H, rows, cols, outer, gutter):
    cw = (W - 2*outer - (cols-1)*gutter) // cols
    ch = (H - 2*outer - (rows-1)*gutter) // rows
    rects = []
    for r in range(rows):
        for c in range(cols):
            x = outer + c*(cw+gutter)
            y = outer + r*(ch+gutter)
            rects.append((x,y,cw,ch))
    return rects

def paste_in_cell(canvas, face, rect, fill_range):
    cx, cy, cw, ch = rect
    s = min(cw/face.width, ch/face.height) * random.uniform(*fill_range)
    tw = max(1, int(face.width * s))
    th = max(1, int(face.height * s))
    face_r = face.resize((tw, th), Image.BICUBIC)
    x = cx + (cw - tw)//2
    y = cy + (ch - th)//2
    canvas.paste(face_r, (x, y))

    W, H = canvas.size
    return (x, y, tw, th, ((x + tw/2)/W, (y + th/2)/H, tw/W, th/H))

def main():
    per_celeb = {}
    for d in AUG_DIR.iterdir():
        if not d.is_dir(): continue
        try:
            rid = int(d.name)
        except:
            continue
        if rid in SELECTED:
            imgs = list(d.glob("*.jpg"))
            if imgs: per_celeb[rid] = imgs

    celeb_ids = list(per_celeb.keys())
    assert celeb_ids, "No per-celeb images found in single_100/"

    OUT_IMG.mkdir(parents=True, exist_ok=True)
    OUT_LAB.mkdir(parents=True, exist_ok=True)
    for p in OUT_IMG.glob("*.jpg"): p.unlink()
    for p in OUT_LAB.glob("*.txt"): p.unlink()

    types, weights = [], []
    for (r,c,pct,_,_) in GRID_MIX:
        types.append((r,c))
        weights.append(pct)
    s = sum(weights); weights = [w/s for w in weights]

    W, H = CANVAS
    with LOG_CSV.open("w", newline="") as f:
        fields = ["image","rows","cols","slot","raw_id","compact_id","src_file",
                  "cell_x","cell_y","cell_w","cell_h","x1","y1","w","h","cx","cy","wnorm","hnorm"]
        writer = csv.DictWriter(f, fieldnames=fields); writer.writeheader()

        for i in range(TOT_SAMPLES):
            rows, cols = random.choices(types, weights=weights, k=1)[0]
            exact_k = rows * cols
            assert len(celeb_ids) >= exact_k, f"Need ≥{exact_k} classes for {rows}x{cols}"

            (outer, gutter), fill = next(((omg, fr) for (rr,cc,_,omg,fr) in GRID_MIX if rr==rows and cc==cols))
            rects = cell_rects(W, H, rows, cols, outer, gutter)

            chosen = random.sample(celeb_ids, exact_k)

            g = random.randint(230, 255)
            canvas = Image.new("RGB", (W, H), (g,g,g))
            labels = []

            for slot, (rect, rid) in enumerate(zip(rects, chosen)):
                face_path = random.choice(per_celeb[rid])
                with Image.open(face_path) as face:
                    face = face.convert("RGB")
                    x,y,tw,th,(cx,cy,wn,hn) = paste_in_cell(canvas, face, rect, fill)
                labels.append((REMAP[rid], cx, cy, wn, hn))
                writer.writerow({
                    "image": f"synth_{i:06d}.jpg", "rows": rows, "cols": cols, "slot": slot,
                    "raw_id": rid, "compact_id": REMAP[rid], "src_file": face_path.name,
                    "cell_x": rect[0], "cell_y": rect[1], "cell_w": rect[2], "cell_h": rect[3],
                    "x1": x, "y1": y, "w": tw, "h": th, "cx": cx, "cy": cy, "wnorm": wn, "hnorm": hn
                })

            # save
            out_img = OUT_IMG / f"synth_{i:06d}.jpg"
            out_lab = OUT_LAB / f"synth_{i:06d}.txt"
            canvas.save(out_img, quality=95)
            with out_lab.open("w") as lf:
                for cls,cx,cy,wn,hn in labels:
                    lf.write(f"{cls} {cx:.6f} {cy:.6f} {wn:.6f} {hn:.6f}\n")

            if i % 200 == 0:
                print(f"{i}/{TOT_SAMPLES} composites")
                gc.collect()

    print("Mixed-grid composites ready.")
    print("Images ->", OUT_IMG)
    print("Labels ->", OUT_LAB)
    print("Log     ->", LOG_CSV)

main()


0/10000 composites
200/10000 composites
400/10000 composites
600/10000 composites
800/10000 composites
1000/10000 composites
1200/10000 composites
1400/10000 composites
1600/10000 composites
1800/10000 composites
2000/10000 composites
2200/10000 composites
2400/10000 composites
2600/10000 composites
2800/10000 composites
3000/10000 composites
3200/10000 composites
3400/10000 composites
3600/10000 composites
3800/10000 composites
4000/10000 composites
4200/10000 composites
4400/10000 composites
4600/10000 composites
4800/10000 composites
5000/10000 composites
5200/10000 composites
5400/10000 composites
5600/10000 composites
5800/10000 composites
6000/10000 composites
6200/10000 composites
6400/10000 composites
6600/10000 composites
6800/10000 composites
7000/10000 composites
7200/10000 composites
7400/10000 composites
7600/10000 composites
7800/10000 composites
8000/10000 composites
8200/10000 composites
8400/10000 composites
8600/10000 composites
8800/10000 composites
9000/10000 compos

In [7]:
# Split into train/val/test and write data.yaml
import random, shutil, textwrap, json
from pathlib import Path

BASE = Path("/content/ObjectDetection")
DATA = BASE / "data"
META = BASE / "meta"

SRC_IMG = DATA/"synth_multi/images"
SRC_LAB = DATA/"synth_multi/labels"
DST     = DATA/"yolo_split"
SPLITS  = {"train":0.8, "val":0.1, "test":0.1}

pairs = sorted(SRC_IMG.glob("*.jpg"))
random.shuffle(pairs)
n = len(pairs); n_tr = int(n*SPLITS["train"]); n_val = int(n*SPLITS["val"])
sets = {"train": pairs[:n_tr], "val": pairs[n_tr:n_tr+n_val], "test": pairs[n_tr+n_val:]}

for split, imgs in sets.items():
    (DST/"images"/split).mkdir(parents=True, exist_ok=True)
    (DST/"labels"/split).mkdir(parents=True, exist_ok=True)
    for img in imgs:
        lab = SRC_LAB/(img.stem + ".txt")
        shutil.copy(img, DST/"images"/split/img.name)
        shutil.copy(lab, DST/"labels"/split/lab.name)
    print(split, len(imgs))

sel = json.loads((META/"selected_ids.json").read_text())
yaml_text = textwrap.dedent("""\
path: /content/ObjectDetection/data/yolo_split
train: images/train
val: images/val
test: images/test
names:
""") + "\n".join([f'  - "{rid}"' for rid in sel])
(DATA/"data.yaml").write_text(yaml_text)
print("Wrote:", DATA/"data.yaml")


train 8000
val 1000
test 1000
Wrote: /content/ObjectDetection/data/data.yaml


In [8]:
from pathlib import Path, PurePosixPath
import yaml

cfg = yaml.safe_load(Path("/content/ObjectDetection/data/data.yaml").read_text())
base = Path(cfg["path"])

for split in ["train","val","test"]:
    p = base / f"images/{split}"
    print(split, "->", p, "| exists:", p.exists(), "| jpg:", len(list(p.glob("*.jpg"))))


train -> /content/ObjectDetection/data/yolo_split/images/train | exists: True | jpg: 8000
val -> /content/ObjectDetection/data/yolo_split/images/val | exists: True | jpg: 1000
test -> /content/ObjectDetection/data/yolo_split/images/test | exists: True | jpg: 1000


In [9]:
import torch, psutil

# GPU (VRAM)
if torch.cuda.is_available():
    name = torch.cuda.get_device_name(0)
    props = torch.cuda.get_device_properties(0)
    free_bytes, total_bytes = torch.cuda.mem_get_info(0)
    print(f"GPU: {name}")
    print(f"VRAM total : {total_bytes/1e9:.2f} GB")
    print(f"VRAM free  : {free_bytes/1e9:.2f} GB")
    print(f"VRAM used  : {(total_bytes-free_bytes)/1e9:.2f} GB")
else:
    print("No CUDA GPU detected.")

# System RAM
vm = psutil.virtual_memory()
print(f"RAM total  : {vm.total/1e9:.2f} GB")
print(f"RAM avail  : {vm.available/1e9:.2f} GB")
print(f"RAM in use : {(vm.total-vm.available)/1e9:.2f} GB")


GPU: NVIDIA A100-SXM4-40GB
VRAM total : 42.47 GB
VRAM free  : 42.03 GB
VRAM used  : 0.44 GB
RAM total  : 89.63 GB
RAM avail  : 87.01 GB
RAM in use : 2.62 GB


In [10]:
# Train YOLOv8m for best accuracy
from ultralytics import YOLO

yaml = "/content/ObjectDetection/data/data.yaml"

model = YOLO("yolov8m.pt")

results = model.train(
    data=yaml,
    imgsz=896,
    epochs=100,
    batch=-1,
    workers=8,
    device=0,
    amp=True,
    cache="ram",
    mosaic=0.2,
    close_mosaic=10,
    lr0=0.001,
    patience=25,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
)

print("Run dir :", model.trainer.save_dir)
print("Best pt :", model.trainer.save_dir / "weights" / "best.pt")


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt': 100% ━━━━━━━━━━━━ 49.7MB 170.4MB/s 0.3s
Ultralytics 8.3.205 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/ObjectDetection/data/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=896, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0

In [11]:


from ultralytics import YOLO

best = "/content/runs/detect/train/weights/best.pt"
yaml = "/content/ObjectDetection/data/data.yaml"

print("Validating best.pt on TEST split…")
YOLO(best).val(data=yaml, split="test", imgsz=896, device=0)


Validating best.pt on TEST split…
Ultralytics 8.3.205 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
Model summary (fused): 92 layers, 25,865,236 parameters, 0 gradients, 78.8 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2457.2±838.2 MB/s, size: 167.7 KB)
[K[34m[1mval: [0mScanning /content/ObjectDetection/data/yolo_split/labels/test... 1000 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 1000/1000 1.5Kit/s 0.7s
[34m[1mval: [0mNew cache created: /content/ObjectDetection/data/yolo_split/labels/test.cache
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 63/63 3.9it/s 16.0s
                   all       1000       8458          1          1      0.995      0.995
                   228        187        187          1          1      0.995      0.995
                   447        196        196          1          1      0.995      0.995
                   487       

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7ba60ac33650>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    

In [14]:
# Make /content/ObjectDetection/eval/group44.jpg
from pathlib import Path
from PIL import Image
import json, random, math

BASE = Path("/content/ObjectDetection")
AUG_DIR = BASE/"data/single_100"
META    = BASE/"meta"
OUTDIR  = BASE/"eval"; OUTDIR.mkdir(parents=True, exist_ok=True)

SEL = [int(x) for x in json.loads((META/"selected_ids.json").read_text())]

N = len(SEL)
rows = int(math.floor(math.sqrt(N)))          # 6
cols = int(math.ceil(N / rows))               # 8
print(f"Grid: {rows} x {cols} (cells={rows*cols}, faces={N})")

W, H = 2048, 1536
outer, gutter = 24, 16
fill_range = (0.85, 0.95)

def cell_rects(W, H, rows, cols, outer, gutter):
    cw = (W - 2*outer - (cols-1)*gutter) // cols
    ch = (H - 2*outer - (rows-1)*gutter) // rows
    rects = []
    for r in range(rows):
        for c in range(cols):
            x = outer + c*(cw+gutter)
            y = outer + r*(ch+gutter)
            rects.append((x,y,cw,ch))
    return rects

def paste_center_fit(canvas, img, rect, fill_range):
    cx, cy, cw, ch = rect
    s = min(cw/img.width, ch/img.height) * random.uniform(*fill_range)
    tw = max(1, int(img.width * s))
    th = max(1, int(img.height * s))
    imr = img.resize((tw, th), Image.BICUBIC)
    x = cx + (cw - tw)//2
    y = cy + (ch - th)//2
    canvas.paste(imr, (x, y))

rects = cell_rects(W, H, rows, cols, outer, gutter)
canvas = Image.new("RGB", (W, H), (236,236,236))

rng = random.Random(123)
cells_to_fill = rects[:N]

for rid, rect in zip(SEL, cells_to_fill):
    imgs = list((AUG_DIR/str(rid)).glob("*.jpg"))
    assert imgs, f"No images found for ID {rid} in {AUG_DIR/rid}"
    pick = rng.choice(imgs)
    with Image.open(pick) as im:
        paste_center_fit(canvas, im.convert("RGB"), rect, fill_range)

out_path = OUTDIR/"group44.jpg"
canvas.save(out_path, quality=95)
print("Saved:", out_path)


Grid: 6 x 8 (cells=48, faces=44)
Saved: /content/ObjectDetection/eval/group44.jpg


In [15]:
# Tiled inference for a large class photo
from ultralytics import YOLO
from pathlib import Path
import numpy as np, cv2, json, glob, os

BASE = Path("/content/ObjectDetection")
EVAL = BASE / "eval"
META = BASE / "meta"

REMAP = json.loads((META / "id_remap.json").read_text())
REMAP = {int(k): int(v) for k, v in REMAP.items()}
RAW_FROM_COMPACT = {v: int(k) for k, v in REMAP.items()}
EXPECTED_RAW = sorted(REMAP.keys())

weight_globs = [
    "/content/runs/detect/train*/weights/best.pt",
    "/content/ObjectDetection/runs/detect/train*/weights/best.pt",
]
candidates = []
for g in weight_globs:
    candidates.extend(glob.glob(g))
candidates = sorted(candidates, key=os.path.getmtime)
assert candidates, "No best.pt found. Train first or set best_pt manually."
best_pt = candidates[-1]
print("Using weights:", best_pt)

model = YOLO(best_pt)

def iou_xyxy(a, b):
    ix1, iy1 = max(a[0], b[0]), max(a[1], b[1])
    ix2, iy2 = min(a[2], b[2]), min(a[3], b[3])
    iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1)
    inter = iw * ih
    area_a = max(0.0, (a[2]-a[0]) * (a[3]-a[1]))
    area_b = max(0.0, (b[2]-b[0]) * (b[3]-b[1]))
    return inter / (area_a + area_b - inter + 1e-9)

def nms_merge(dets, iou_thr=0.55):
    dets = sorted(dets, key=lambda d: d['conf'], reverse=True)
    keep = []
    for d in dets:
        for k in keep:
            if d['cls'] == k['cls'] and iou_xyxy(d['xyxy'], k['xyxy']) > iou_thr:
                break
        else:
            keep.append(d)
    return keep

def tiled_predict(img_path, tile=1280, overlap=0.25, conf=0.20, iou=0.50):
    im_bgr = cv2.imread(img_path); assert im_bgr is not None, f"Cannot read image: {img_path}"
    im = im_bgr[:, :, ::-1]
    H, W = im.shape[:2]
    step = int(tile * (1 - overlap))

    x_starts = list(range(0, max(W - tile, 0) + 1, step))
    y_starts = list(range(0, max(H - tile, 0) + 1, step))
    if x_starts[-1] != max(W - tile, 0): x_starts.append(max(W - tile, 0))
    if y_starts[-1] != max(H - tile, 0): y_starts.append(max(H - tile, 0))

    all_dets = []
    for y0 in y_starts:
        for x0 in x_starts:
            y1 = min(H, y0 + tile); x1 = min(W, x0 + tile)
            tile_im = im[y0:y1, x0:x1]
            if tile_im.size == 0: continue

            pred = model.predict(
                source=tile_im, imgsz=tile, conf=conf, iou=iou,
                device=0, verbose=False, max_det=300
            )[0]

            if pred.boxes is None: continue
            for b in pred.boxes:
                cls = int(b.cls[0].item())
                confb = float(b.conf[0].item())
                x1b, y1b, x2b, y2b = map(float, b.xyxy[0].tolist())
                all_dets.append({
                    "cls": cls,
                    "conf": confb,
                    "xyxy": [x0 + x1b, y0 + y1b, x0 + x2b, y0 + y2b],
                })

    merged = nms_merge(all_dets, iou_thr=0.55)
    return merged, im_bgr

big_img = str(EVAL / "group44.jpg")
assert os.path.exists(big_img), f"Missing: {big_img}"
dets, vis = tiled_predict(big_img, tile=1280, overlap=0.25, conf=0.20, iou=0.50)

found_raw = []
for d in dets:
    x1, y1, x2, y2 = map(int, d["xyxy"])
    cls = d["cls"]; raw_id = RAW_FROM_COMPACT.get(cls, cls)
    found_raw.append(raw_id)
    cv2.rectangle(vis, (x1, y1), (x2, y2), (0, 255, 0), 2)
    label = f"{raw_id}:{d['conf']:.2f}"
    cv2.putText(vis, label, (x1, max(16, y1 - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2, cv2.LINE_AA)

out_path_img = EVAL / "pred_group44.jpg"
out_path_json = EVAL / "pred_group44.json"
cv2.imwrite(str(out_path_img), vis)

found_set = sorted(set(found_raw))
missing = [rid for rid in EXPECTED_RAW if rid not in found_set]
summary = {
    "image": big_img,
    "weights": best_pt,
    "tile": 1280, "overlap": 0.25, "conf": 0.20, "iou": 0.50,
    "detections_total": len(dets),
    "raw_ids_found_sorted": found_set,
    "raw_ids_missing_sorted": missing,
    "per_detection": dets,
}
with open(out_path_json, "w") as f:
    json.dump(summary, f, indent=2)

print("Saved image :", out_path_img)
print("Saved JSON  :", out_path_json)
print(f"Detections  : {len(dets)} (unique IDs found: {len(found_set)})")
print("Missing IDs :", missing)


Using weights: /content/runs/detect/train/weights/best.pt
Saved image : /content/ObjectDetection/eval/pred_group44.jpg
Saved JSON  : /content/ObjectDetection/eval/pred_group44.json
Detections  : 54 (unique IDs found: 32)
Missing IDs : [228, 487, 800, 2880, 3401, 3782, 7904, 8722, 8968, 9063, 9256, 9319]


In [16]:
# Recall-first tiled inference (v2) for group44.jpg
from ultralytics import YOLO
from pathlib import Path
import numpy as np, cv2, json, glob, os, math

BASE = Path("/content/ObjectDetection")
EVAL = BASE / "eval"
META = BASE / "meta"

# --- id mapping ---
REMAP = json.loads((META / "id_remap.json").read_text())       # raw -> compact
REMAP = {int(k): int(v) for k, v in REMAP.items()}
RAW_FROM_COMPACT = {v: int(k) for k, v in REMAP.items()}
EXPECTED_RAW = sorted(REMAP.keys())

# --- freshest weights ---
cands = []
for g in ("/content/runs/detect/train*/weights/best.pt",
          "/content/ObjectDetection/runs/detect/train*/weights/best.pt"):
    cands += glob.glob(g)
cands = sorted(cands, key=os.path.getmtime)
assert cands, "No best.pt found."
best_pt = cands[-1]
print("Using weights:", best_pt)
model = YOLO(best_pt)

# --- helpers ---
def iou_xyxy(a, b):
    ix1, iy1 = max(a[0], b[0]), max(a[1], b[1])
    ix2, iy2 = min(a[2], b[2]), min(a[3], b[3])
    iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1)
    inter = iw * ih
    area_a = max(0.0, (a[2]-a[0]) * (a[3]-a[1]))
    area_b = max(0.0, (b[2]-b[0]) * (b[3]-b[1]))
    return inter / (area_a + area_b - inter + 1e-9)

def nms_merge(dets, iou_thr=0.60):
    dets = sorted(dets, key=lambda d: d['conf'], reverse=True)
    keep = []
    for d in dets:
        for k in keep:
            if d['cls'] == k['cls'] and iou_xyxy(d['xyxy'], k['xyxy']) > iou_thr:
                break
        else:
            keep.append(d)
    return keep

def best_one_per_raw_id(dets):
    best = {}
    for d in dets:
        raw_id = d["raw_id"]
        if raw_id not in best or d["conf"] > best[raw_id]["conf"]:
            best[raw_id] = d
    return list(best.values())

def tiled_predict(img_path, tile=1536, overlap=0.35, conf=0.12, iou=0.50, pad=64):
    im_bgr = cv2.imread(img_path); assert im_bgr is not None, f"Cannot read {img_path}"
    # reflect-pad to protect seam objects
    im_bgr_pad = cv2.copyMakeBorder(im_bgr, pad, pad, pad, pad, cv2.BORDER_REFLECT_101)
    im = im_bgr_pad[:, :, ::-1]
    H, W = im.shape[:2]
    step = int(tile * (1 - overlap))

    x_starts = list(range(0, max(W - tile, 0) + 1, step))
    y_starts = list(range(0, max(H - tile, 0) + 1, step))
    if x_starts[-1] != max(W - tile, 0): x_starts.append(max(W - tile, 0))
    if y_starts[-1] != max(H - tile, 0): y_starts.append(max(H - tile, 0))

    all_dets = []
    for y0 in y_starts:
        for x0 in x_starts:
            y1 = min(H, y0 + tile); x1 = min(W, x0 + tile)
            tile_im = im[y0:y1, x0:x1]
            if tile_im.size == 0: continue

            pred = model.predict(source=tile_im, imgsz=tile, conf=conf, iou=iou,
                                 device=0, verbose=False, max_det=400)[0]
            if pred.boxes is None: continue
            for b in pred.boxes:
                cls = int(b.cls[0].item())                    # compact id
                raw_id = RAW_FROM_COMPACT.get(cls, cls)       # back to raw id for accounting
                confb = float(b.conf[0].item())
                x1b, y1b, x2b, y2b = map(float, b.xyxy[0].tolist())
                # shift back to padded-image coords
                X1, Y1, X2, Y2 = (x0 + x1b, y0 + y1b, x0 + x2b, y0 + y2b)
                # remove padding shift to original image coords
                all_dets.append({
                    "cls": cls,
                    "raw_id": raw_id,
                    "conf": confb,
                    "xyxy": [X1 - pad, Y1 - pad, X2 - pad, Y2 - pad],
                })

    # cross-tile merge then pick best box per raw_id
    merged = nms_merge(all_dets, iou_thr=0.60)
    best = best_one_per_raw_id(merged)
    return best, im_bgr

# --- run ---
big_img = str(EVAL / "group44.jpg")
assert os.path.exists(big_img), f"Missing: {big_img}"
dets, vis = tiled_predict(big_img, tile=1536, overlap=0.35, conf=0.12, iou=0.50, pad=64)

# draw + report
for d in dets:
    x1, y1, x2, y2 = map(int, d["xyxy"])
    raw_id = d["raw_id"]
    cv2.rectangle(vis, (x1,y1), (x2,y2), (0,255,0), 2)
    cv2.putText(vis, f"{raw_id}:{d['conf']:.2f}", (x1, max(16, y1-6)),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2, cv2.LINE_AA)

out_img  = EVAL / "pred_group44_v2.jpg"
out_json = EVAL / "pred_group44_v2.json"
cv2.imwrite(str(out_img), vis)

found_raw = sorted({d["raw_id"] for d in dets})
missing = [rid for rid in EXPECTED_RAW if rid not in found_raw]
summary = {
    "image": big_img,
    "weights": best_pt,
    "tile": 1536, "overlap": 0.35, "conf": 0.12, "iou": 0.50, "pad": 64,
    "detections_total": len(dets),
    "raw_ids_found_sorted": found_raw,
    "raw_ids_missing_sorted": missing,
    "per_detection": dets,
}
with open(out_json, "w") as f: json.dump(summary, f, indent=2)

print("Saved:", out_img)
print("Saved:", out_json)
print(f"Unique IDs found: {len(found_raw)} / 44")
print("Missing IDs:", missing)


Using weights: /content/runs/detect/train/weights/best.pt
Saved: /content/ObjectDetection/eval/pred_group44_v2.jpg
Saved: /content/ObjectDetection/eval/pred_group44_v2.json
Unique IDs found: 33 / 44
Missing IDs: [228, 487, 2880, 3401, 3782, 7904, 8656, 8722, 8968, 9063, 9319]
