In [None]:
!pip -q install ultralytics opencv-python pyyaml tqdm scikit-learn pandas numpy matplotlib
!apt -q update && apt -q install -y p7zip-full > /dev/null

Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 https://cli.github.com/packages stable InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:10 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Reading package lists...
Building dependency tree...
Reading state information...
39 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mSkipping acquire of configured file 'main/source/Sources' as reposi

In [None]:
import os
import sys
import io
import json
import csv
import shutil
import zipfile
import subprocess
from pathlib import Path
from typing import List, Tuple, Optional, Dict
import random
import yaml
import math
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from ultralytics import YOLO

In [None]:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

In [None]:
ZIP_PATH = Path("/content/ToothNumber_TaskDataset.zip")   # Input archive path
EXTRACT_ROOT = Path("/content/dataset_raw")
OUT_ROOT = Path("/content/dataset_prepared")              # Final YOLO-format dataset
RUNS_ROOT = Path("/content/runs")                         # Root for all outputs
SUBMISSION_DIR = RUNS_ROOT / "submission_artifacts"       # Consolidated deliverables
PRED_VIS_DIR = RUNS_ROOT / "predict_vis"                  # Raw prediction visuals
POSTPROC_VIS_DIR = RUNS_ROOT / "postproc_vis"             # Post-processed prediction visuals
SEED = 42
IMG_EXTS = (".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")
RECOMMENDED_IMG_SIZE = 640
EPOCHS = 5
BATCH = 16
CONF_THRESH = 0.25
IOU_THRESH = 0.7
NUM_INLINE_PREVIEWS = 6  # number of prediction images to display inline

# FDI Class list in the exact required order
FDI_CLASS_NAMES = [
    "Canine (13)","Canine (23)","Canine (33)","Canine (43)",
    "Central Incisor (21)","Central Incisor (41)","Central Incisor (31)","Central Incisor (11)",
    "First Molar (16)","First Molar (26)","First Molar (36)","First Molar (46)",
    "First Premolar (14)","First Premolar (34)","First Premolar (44)","First Premolar (24)",
    "Lateral Incisor (22)","Lateral Incisor (32)","Lateral Incisor (42)","Lateral Incisor (12)",
    "Second Molar (17)","Second Molar (27)","Second Molar (37)","Second Molar (47)",
    "Second Premolar (15)","Second Premolar (25)","Second Premolar (35)","Second Premolar (45)",
    "Third Molar (18)","Third Molar (28)","Third Molar (38)","Third Molar (48)"
]

In [None]:
def set_all_seeds(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    try:
        import torch
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    except Exception:
        pass

def file_info(p: Path):
    try:
        sz = p.stat().st_size
    except Exception:
        sz = None
    return {"exists": p.exists(), "size": sz, "suffix": p.suffix.lower()}

def extract_archive(src: Path, dest: Path) -> bool:
    if dest.exists():
        shutil.rmtree(dest)
    dest.mkdir(parents=True, exist_ok=True)

    extracted_ok = False
    err_msgs = []

    if src.is_dir():
        shutil.copytree(src, dest, dirs_exist_ok=True)
        extracted_ok = True
        return extracted_ok

    if src.is_file():
        try:
            if zipfile.is_zipfile(src):
                with zipfile.ZipFile(src, 'r') as zf:
                    zf.extractall(dest)
                extracted_ok = True
            else:
                err_msgs.append("Python zipfile: not recognized as ZIP.")
        except Exception as e:
            err_msgs.append(f"Python zipfile error: {e}")

    if not extracted_ok and src.is_file():
        try:
            code = subprocess.call(["unzip", "-q", "-o", str(src), "-d", str(dest)])
            if code == 0:
                extracted_ok = True
            else:
                err_msgs.append(f"unzip exit code {code}")
        except Exception as e:
            err_msgs.append(f"unzip error: {e}")

    if not extracted_ok and src.is_file():
        try:
            code = subprocess.call(["7z", "x", "-y", str(src), f"-o{dest}"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            if code == 0:
                extracted_ok = True
            else:
                err_msgs.append(f"7z exit code {code}")
        except Exception as e:
            err_msgs.append(f"7z error: {e}")

    if not extracted_ok:
        print("Could not extract the file. Debug:", "; ".join(err_msgs))
    return extracted_ok

def find_best_images_dir(root: Path) -> Path:
    candidates = []
    for p in root.rglob("images"):
        cnt = sum(1 for f in p.glob("*") if f.suffix.lower() in IMG_EXTS)
        if cnt > 0:
            candidates.append((cnt, p))
    if root.name.lower() == "images":
        cnt = sum(1 for f in root.glob("*") if f.suffix.lower() in IMG_EXTS)
        if cnt > 0:
            candidates.append((cnt, root))
    if not candidates:
        raise FileNotFoundError("Couldn't find an 'images' folder that contains any images.")
    # sort by count descending, pick best
    candidates.sort(reverse=True, key=lambda x: x[0])
    return candidates[0][1]

def detect_labels_dir(images_dir: Path) -> Optional[Path]:
    cand = images_dir.parent / "labels"
    if cand.exists():
        return cand
    return None

def label_for_image(img_path: Path, labels_dir: Optional[Path]) -> Optional[Path]:
    same = img_path.with_suffix(".txt")
    if same.exists():
        return same
    if labels_dir is not None:
        cand = labels_dir / f"{img_path.stem}.txt"
        if cand.exists():
            return cand
    return None

def ensure_clean_dir(p: Path):
    if p.exists():
        shutil.rmtree(p)
    p.mkdir(parents=True, exist_ok=True)

def write_yaml(pth: Path, data: dict):
    with open(pth, "w", encoding="utf-8") as f:
        yaml.safe_dump(data, f, sort_keys=False, allow_unicode=True)

def safe_copy(src: Path, dst: Path):
    dst.parent.mkdir(parents=True, exist_ok=True
    )
    shutil.copy2(src, dst)

def yolo_save_dir_to_best(weights_dir: Path) -> Path:
    best = weights_dir / "weights" / "best.pt"
    if not best.exists():
        raise FileNotFoundError(f"best.pt not found under {weights_dir}")
    return best

def write_metrics_csv_json(metrics_dict: dict, out_csv: Path, out_json: Path):
    keys_of_interest = ["precision", "recall", "map50", "map"]
    row = {k: metrics_dict.get(k, None) for k in keys_of_interest}
    out_csv.parent.mkdir(parents=True, exist_ok=True)
    with open(out_csv, "w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=keys_of_interest)
        w.writeheader()
        w.writerow(row)
    with open(out_json, "w", encoding="utf-8") as f:
        json.dump(metrics_dict, f, indent=2)

def list_and_copy_if_exists(src_dir: Path, filenames: List[str], dest_dir: Path):
    dest_dir.mkdir(parents=True, exist_ok=True)
    found = []
    for name in filenames:
        p = src_dir / name
        if p.exists():
            safe_copy(p, dest_dir / name)
            found.append(p)
    return found

In [None]:
# Extract dataset

print("Input:", ZIP_PATH, file_info(ZIP_PATH))
ok = extract_archive(ZIP_PATH, EXTRACT_ROOT)
if not ok:
    raise SystemExit("Extraction failed. Please provide a valid dataset archive or directory.")

print("Extracted to:", EXTRACT_ROOT)
!ls -R /content/dataset_raw | head -100

# Pair images and labels, split 80/10/10, build data.yaml

set_all_seeds(SEED)

images_dir = find_best_images_dir(EXTRACT_ROOT)
labels_dir = detect_labels_dir(images_dir)
print("Using images dir:", images_dir)
print("Labels dir:", labels_dir if labels_dir else "Expecting .txt annotations next to images.")

ensure_clean_dir(OUT_ROOT)
(OUT_ROOT/"images").mkdir(parents=True, exist_ok=True)
(OUT_ROOT/"labels").mkdir(parents=True, exist_ok=True)

all_imgs = [p for p in images_dir.glob("*") if p.suffix.lower() in IMG_EXTS]
pairs, missing = [], 0
for img in all_imgs:
    lbl = label_for_image(img, labels_dir)
    if lbl:
        pairs.append((img, lbl))
    else:
        missing += 1
print(f"Found {len(pairs)} image/label pairs. Missing labels for {missing} images.")

random.shuffle(pairs)
n = len(pairs)
n_train, n_val = int(0.8 * n), int(0.1 * n)
splits = {
    "train": pairs[:n_train],
    "val": pairs[n_train:n_train + n_val],
    "test": pairs[n_train + n_val:]
}

for split, items in splits.items():
    (OUT_ROOT/"images"/split).mkdir(parents=True, exist_ok=True)
    (OUT_ROOT/"labels"/split).mkdir(parents=True, exist_ok=True)
    for im, lbl in tqdm(items, desc=f"Copying {split}", unit="file"):
        safe_copy(im, OUT_ROOT/"images"/split/im.name)
        safe_copy(lbl, OUT_ROOT/"labels"/split/f"{im.stem}.txt")

data_yaml = {
    "train": str(OUT_ROOT/"images"/"train"),
    "val": str(OUT_ROOT/"images"/"val"),
    "test": str(OUT_ROOT/"images"/"test"),
    "names": {i: n for i, n in enumerate(FDI_CLASS_NAMES)}
}
write_yaml(OUT_ROOT/"data.yaml", data_yaml)
print("Prepared dataset at:", OUT_ROOT)
print("data.yaml:", OUT_ROOT/"data.yaml")

Input: /content/ToothNumber_TaskDataset.zip {'exists': True, 'size': 42629954, 'suffix': '.zip'}
Extracted to: /content/dataset_raw
/content/dataset_raw:
images
labels

/content/dataset_raw/images:
00147087-20240918-120248027.jpg
0337fb14-20240822-124747143.jpg
05473f26-20240924-154033777.jpg
05ff01fa-20250108-115407401.jpg
061339f1-20240911-110938655.jpg
069750f6-20240914-102814322.jpg
084c3562-20240827-154718191.jpg
0ba65172-20240821-105924223.jpg
0c4cebbe-20240813-144203674.jpg
0fcae64f-20241217-121556078.jpg
0fd502a1-20250123-111857443.jpg
103dd85b-20240831-101746682.jpg
124e696d-20240914-105651782.jpg
1459fc45-20240831-113821806.jpg
16a28cd9-20240723-113852192.jpg
18fd6ab8-20250415-143542497.jpg
1b000dc7-20240813-115604759.jpg
1c50bda9-20241231-145320490.jpg
1fc5e51d-20241219-142523942.jpg
22464331-20240919-102844605.jpg
23f1d012-20240628-115132889.jpg
26f91eeb-20240903-150428112.jpg
271041ce-20250421-112436631.jpg
2725830c-20240829-130426513.jpg
281350f8-20240813-145334040.jpg
28

Copying train: 100%|██████████| 397/397 [00:00<00:00, 2497.64file/s]
Copying val: 100%|██████████| 49/49 [00:00<00:00, 2444.41file/s]
Copying test: 100%|██████████| 51/51 [00:00<00:00, 2358.71file/s]

Prepared dataset at: /content/dataset_prepared
data.yaml: /content/dataset_prepared/data.yaml





In [None]:
# Train YOLOv8

ensure_clean_dir(RUNS_ROOT)
model = YOLO("yolov8s.pt")
train_results = model.train(
    data=str(OUT_ROOT/"data.yaml"),
    imgsz=RECOMMENDED_IMG_SIZE,
    epochs=EPOCHS,
    batch=BATCH,
    project=str(RUNS_ROOT),
    name="train",
    verbose=True
)
train_save_dir = Path(train_results.save_dir)
best_pt = yolo_save_dir_to_best(train_save_dir)
print("Training run dir:", train_save_dir)
print("Best weights:", best_pt)

Ultralytics 8.3.189 🚀 Python-3.12.11 torch-2.8.0+cu126 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/dataset_prepared/data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=1

In [None]:
# Evaluate on val and test, save metrics and confusion matrices

ensure_clean_dir(SUBMISSION_DIR)

# Validation
val_metrics = YOLO(str(best_pt)).val(
    data=str(OUT_ROOT/"data.yaml"),
    split="val",
    plots=True,
    save_json=True,
    project=str(RUNS_ROOT),
    name="val_eval"
)
val_dir = Path(val_metrics.save_dir)
print("VAL:", val_metrics.results_dict)

write_metrics_csv_json(
    val_metrics.results_dict,
    SUBMISSION_DIR / "metrics_val.csv",
    SUBMISSION_DIR / "metrics_val.json"
)

val_artifacts = [
    "confusion_matrix.png",
    "confusion_matrix_normalized.png",
    "P_curve.png",
    "R_curve.png",
    "PR_curve.png",
    "F1_curve.png",
    "results.png",
    "labels_correlogram.jpg",
    "labels.jpg"
]
list_and_copy_if_exists(val_dir, val_artifacts, SUBMISSION_DIR / "val_plots")

# Test
test_metrics = YOLO(str(best_pt)).val(
    data=str(OUT_ROOT/"data.yaml"),
    split="test",
    plots=True,
    save_json=True,
    project=str(RUNS_ROOT),
    name="test_eval"
)
test_dir = Path(test_metrics.save_dir)
print("TEST:", test_metrics.results_dict)

write_metrics_csv_json(
    test_metrics.results_dict,
    SUBMISSION_DIR / "metrics_test.csv",
    SUBMISSION_DIR / "metrics_test.json"
)
list_and_copy_if_exists(test_dir, val_artifacts, SUBMISSION_DIR / "test_plots")

print("Validation plots dir:", val_dir)
print("Test plots dir:", test_dir)
print("Submission artifacts dir:", SUBMISSION_DIR)


Ultralytics 8.3.189 🚀 Python-3.12.11 torch-2.8.0+cu126 CPU (Intel Xeon 2.20GHz)
Model summary (fused): 72 layers, 11,137,968 parameters, 0 gradients, 28.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1383.4±295.9 MB/s, size: 89.0 KB)
[K[34m[1mval: [0mScanning /content/dataset_prepared/labels/val.cache... 49 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 49/49 57344.0it/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 4/4 0.10it/s 40.4s
                   all         49       1461       0.32       0.74      0.401      0.264
           Canine (13)         49         49      0.309      0.755      0.332      0.223
           Canine (23)         48         48       0.25      0.236      0.279      0.173
           Canine (33)         49         49      0.327      0.673      0.448      0.272
           Canine (43)         48         48      0.272      0.729      0.307       0.21
  Central In

In [None]:
# Sample predictions on test images (save and display inline)

ensure_clean_dir(PRED_VIS_DIR)
pred_results = YOLO(str(best_pt)).predict(
    source=str(OUT_ROOT/"images"/"test"),
    save=True,
    conf=CONF_THRESH,
    iou=IOU_THRESH,
    imgsz=RECOMMENDED_IMG_SIZE,
    project=str(PRED_VIS_DIR.parent),
    name=PRED_VIS_DIR.name
)
print("Predictions saved under:", PRED_VIS_DIR)


image 1/51 /content/dataset_prepared/images/test/069750f6-20240914-102814322.jpg: 640x640 1 Central Incisor (21), 2 Central Incisor (41)s, 2 First Molar (36)s, 2 Lateral Incisor (22)s, 3 Lateral Incisor (32)s, 3 Lateral Incisor (42)s, 1 Second Molar (17), 1 Second Molar (27), 1 Third Molar (28), 1 Third Molar (48), 572.2ms
image 2/51 /content/dataset_prepared/images/test/084c3562-20240827-154718191.jpg: 640x640 3 Canine (13)s, 1 Canine (33), 3 Central Incisor (21)s, 4 Central Incisor (41)s, 3 Central Incisor (31)s, 1 Central Incisor (11), 2 First Molar (16)s, 1 First Molar (26), 1 First Molar (36), 1 First Molar (46), 2 Lateral Incisor (22)s, 1 Second Molar (17), 1 Second Molar (27), 2 Second Molar (37)s, 1 Second Molar (47), 1 Second Premolar (15), 1 Second Premolar (35), 2 Third Molar (48)s, 545.6ms
image 3/51 /content/dataset_prepared/images/test/22464331-20240919-102844605.jpg: 640x640 1 Canine (33), 3 Central Incisor (21)s, 3 Central Incisor (41)s, 2 Central Incisor (31)s, 1 Cent

In [None]:
# Inline display of a few predictions using Results.plot()
to_show = pred_results[:NUM_INLINE_PREVIEWS] if isinstance(pred_results, list) else []
fig_cols = 3
fig_rows = max(1, int(np.ceil(len(to_show) / fig_cols)))
if len(to_show) > 0:
    plt.figure(figsize=(fig_cols * 5, fig_rows * 4))
    for i, res in enumerate(to_show):
        im_bgr = res.plot()  # BGR numpy array
        im_rgb = im_bgr[..., ::-1]
        plt.subplot(fig_rows, fig_cols, i + 1)
        plt.imshow(im_rgb)
        plt.axis("off")
        plt.title(Path(res.path).name if hasattr(res, "path") else f"pred_{i}")
    plt.tight_layout()
    plt.show()

In [None]:
# Also save a grid preview image
preview_path = PRED_VIS_DIR / "preview_grid.png"
if len(to_show) > 0:
    plt.figure(figsize=(fig_cols * 5, fig_rows * 4))
    for i, res in enumerate(to_show):
        im_bgr = res.plot()
        im_rgb = im_bgr[..., ::-1]
        plt.subplot(fig_rows, fig_cols, i + 1)
        plt.imshow(im_rgb)
        plt.axis("off")
        plt.title(Path(res.path).name if hasattr(res, "path") else f"pred_{i}")
    plt.tight_layout()
    plt.savefig(preview_path, dpi=150)
    plt.close()
    print("Saved inline preview grid to:", preview_path)

Saved inline preview grid to: /content/runs/predict_vis/preview_grid.png


In [None]:
# Optional Post-Processing for anatomical consistency

CLASS_ID_TO_FDI = {i: name.split("(")[-1].strip(")") for i, name in enumerate(FDI_CLASS_NAMES)}

def load_yolo_labels(label_path: Path) -> np.ndarray:
    if not label_path.exists():
        return np.zeros((0, 5), dtype=float)
    rows = []
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                rows.append([float(p) for p in parts[:5]])
    if not rows:
        return np.zeros((0, 5), dtype=float)
    return np.array(rows, dtype=float)  # cls, cx, cy, w, h

def save_yolo_labels(label_path: Path, arr: np.ndarray):
    with open(label_path, "w") as f:
        for row in arr:
            f.write(f"{int(row[0])} {row[1]:.6f} {row[2]:.6f} {row[3]:.6f} {row[4]:.6f}\n")

def kmeans_split_y(values: np.ndarray) -> np.ndarray:
    try:
        from sklearn.cluster import KMeans
        km = KMeans(n_clusters=2, random_state=SEED, n_init=10)
        labels = km.fit_predict(values.reshape(-1, 1))
        return labels
    except Exception:
        med = np.median(values)
        return (values > med).astype(int)

def assign_quadrants_and_sort(boxes: np.ndarray) -> Dict[str, List[int]]:
    N = boxes.shape[0]
    if N == 0:
        return {}

    cy = boxes[:, 2]
    arch_labels = kmeans_split_y(cy)
    upper_cluster = int(np.argmin([cy[arch_labels == k].mean() if np.any(arch_labels == k) else 1e9 for k in [0, 1]]))

    quadrants = {"1": [], "2": [], "3": [], "4": []}
    for idx in range(N):
        cx = boxes[idx, 1]
        is_upper = (arch_labels[idx] == upper_cluster)
        if is_upper:
            if cx < 0.5:
                quadrants["2"].append(idx)
            else:
                quadrants["1"].append(idx)
        else:
            if cx < 0.5:
                quadrants["3"].append(idx)
            else:
                quadrants["4"].append(idx)

    for q in ["1", "2", "3", "4"]:
        quadrants[q].sort(key=lambda i: boxes[i, 1])
    return quadrants

def expected_fdi_sequence_for_quadrant(q: str) -> List[str]:
    base = {"1": 10, "2": 20, "3": 30, "4": 40}[q]
    return [f"{base + i}" for i in range(1, 9)]

def map_fdi_to_class_index(fdi_code: str) -> Optional[int]:
    for i, name in enumerate(FDI_CLASS_NAMES):
        fdi = name.split("(")[-1].strip(")")
        if fdi == fdi_code:
            return i
    return None

def postprocess_image(img_path: Path, in_label_path: Path, out_label_path: Path, out_vis_path: Path):
    boxes = load_yolo_labels(in_label_path)  # (N,5)
    if boxes.shape[0] == 0:
        return

    quads = assign_quadrants_and_sort(boxes)
    new_boxes = boxes.copy()

    for q, idxs in quads.items():
        seq = expected_fdi_sequence_for_quadrant(q)
        for k, idx in enumerate(idxs):
            if k >= len(seq):
                break
            target_fdi = seq[k]
            target_cls = map_fdi_to_class_index(fdi_code=target_fdi)
            if target_cls is not None:
                new_boxes[idx, 0] = target_cls

    save_yolo_labels(out_label_path, new_boxes)

    img = cv2.imread(str(img_path))
    if img is None:
        return
    h, w = img.shape[:2]
    vis = img.copy()
    for row in new_boxes:
        cls, cx, cy, bw, bh = row
        cls = int(cls)
        x = int((cx - bw / 2) * w)
        y = int((cy - bh / 2) * h)
        x2 = int((cx + bw / 2) * w)
        y2 = int((cy + bh / 2) * h)
        label = FDI_CLASS_NAMES[cls].split("(")[-1].strip(")")
        cv2.rectangle(vis, (x, y), (x2, y2), (0, 255, 0), 2)
        cv2.putText(vis, label, (x, max(15, y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (10, 200, 10), 2, cv2.LINE_AA)

    out_vis_path.parent.mkdir(parents=True, exist_ok=True)
    cv2.imwrite(str(out_vis_path), vis)

In [None]:
# Ensure dirs
ensure_clean_dir(POSTPROC_VIS_DIR)

# Re-run predictions with label TXT saving for post-processing
preds_with_txt = YOLO(str(best_pt)).predict(
    source=str(OUT_ROOT/"images"/"test"),
    save=True,
    save_txt=True,
    conf=CONF_THRESH,
    iou=IOU_THRESH,
    imgsz=RECOMMENDED_IMG_SIZE,
    project=str(PRED_VIS_DIR.parent),
    name=PRED_VIS_DIR.name
)
labels_pred_dir = PRED_VIS_DIR / "labels"

# Post-process predictions
POSTPROC_LABELS_DIR = RUNS_ROOT / "postproc_labels"
ensure_clean_dir(POSTPROC_LABELS_DIR)

test_images_dir = OUT_ROOT / "images" / "test"
for img_path in tqdm(sorted(test_images_dir.glob("*")), desc="Post-processing", unit="img"):
    if img_path.suffix.lower() not in IMG_EXTS:
        continue
    label_path = labels_pred_dir / f"{img_path.stem}.txt"
    out_label_path = POSTPROC_LABELS_DIR / f"{img_path.stem}.txt"
    out_vis_path = POSTPROC_VIS_DIR / img_path.name
    if label_path.exists():
        postprocess_image(img_path, label_path, out_label_path, out_vis_path)

print("Post-processed visuals saved to:", POSTPROC_VIS_DIR)
print("Post-processed labels saved to:", POSTPROC_LABELS_DIR)

# Inline display of a few post-processed images for verification
postproc_images = sorted([p for p in POSTPROC_VIS_DIR.glob("*") if p.suffix.lower() in IMG_EXTS])[:NUM_INLINE_PREVIEWS]
if len(postproc_images) > 0:
    fig_cols = 3
    fig_rows = max(1, int(np.ceil(len(postproc_images) / fig_cols)))
    plt.figure(figsize=(fig_cols * 5, fig_rows * 4))
    for i, p in enumerate(postproc_images):
        bgr = cv2.imread(str(p))
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
        plt.subplot(fig_rows, fig_cols, i + 1)
        plt.imshow(rgb)
        plt.axis("off")
        plt.title(p.name)
    plt.tight_layout()
    plt.show()


image 1/51 /content/dataset_prepared/images/test/069750f6-20240914-102814322.jpg: 640x640 1 Central Incisor (21), 2 Central Incisor (41)s, 2 First Molar (36)s, 2 Lateral Incisor (22)s, 3 Lateral Incisor (32)s, 3 Lateral Incisor (42)s, 1 Second Molar (17), 1 Second Molar (27), 1 Third Molar (28), 1 Third Molar (48), 562.8ms
image 2/51 /content/dataset_prepared/images/test/084c3562-20240827-154718191.jpg: 640x640 3 Canine (13)s, 1 Canine (33), 3 Central Incisor (21)s, 4 Central Incisor (41)s, 3 Central Incisor (31)s, 1 Central Incisor (11), 2 First Molar (16)s, 1 First Molar (26), 1 First Molar (36), 1 First Molar (46), 2 Lateral Incisor (22)s, 1 Second Molar (17), 1 Second Molar (27), 2 Second Molar (37)s, 1 Second Molar (47), 1 Second Premolar (15), 1 Second Premolar (35), 2 Third Molar (48)s, 560.1ms
image 3/51 /content/dataset_prepared/images/test/22464331-20240919-102844605.jpg: 640x640 1 Canine (33), 3 Central Incisor (21)s, 3 Central Incisor (41)s, 2 Central Incisor (31)s, 1 Cent

Post-processing: 100%|██████████| 51/51 [00:00<00:00, 17078.60img/s]

Post-processed visuals saved to: /content/runs/postproc_vis
Post-processed labels saved to: /content/runs/postproc_labels





In [None]:

# Manifest and final notes

manifest = {
    "dataset": {
        "raw": str(EXTRACT_ROOT),
        "prepared": str(OUT_ROOT),
        "data_yaml": str(OUT_ROOT/"data.yaml"),
        "num_pairs": len(pairs),
        "seed": SEED
    },
    "training": {
        "run_dir": str(train_save_dir),
        "best_weights": str(best_pt),
        "epochs": EPOCHS,
        "batch": BATCH,
        "imgsz": RECOMMENDED_IMG_SIZE
    },
    "evaluation": {
        "val_dir": str(val_dir),
        "test_dir": str(test_dir),
        "submission_artifacts": str(SUBMISSION_DIR)
    },
    "predictions": {
        "raw_vis_dir": str(PRED_VIS_DIR),
        "postproc_vis_dir": str(POSTPROC_VIS_DIR),
        "postproc_labels_dir": str(POSTPROC_LABELS_DIR)
    },
    "classes": {i: n for i, n in enumerate(FDI_CLASS_NAMES)}
}
SUBMISSION_DIR.mkdir(parents=True, exist_ok=True)
with open(SUBMISSION_DIR/"manifest.json", "w", encoding="utf-8") as f:
    json.dump(manifest, f, indent=2)

print("Completed. Key outputs:")
print(" - Best weights:", best_pt)
print(" - Metrics CSV/JSON:", SUBMISSION_DIR/"metrics_val.csv", SUBMISSION_DIR/"metrics_val.json",
      SUBMISSION_DIR/"metrics_test.csv", SUBMISSION_DIR/"metrics_test.json")
print(" - Confusion matrices and plots:", SUBMISSION_DIR/"val_plots", SUBMISSION_DIR/"test_plots")
print(" - Raw predictions:", PRED_VIS_DIR)
print(" - Post-processed predictions:", POSTPROC_VIS_DIR)
print(" - Manifest:", SUBMISSION_DIR/"manifest.json")


Completed. Key outputs:
 - Best weights: /content/runs/train/weights/best.pt
 - Metrics CSV/JSON: /content/runs/submission_artifacts/metrics_val.csv /content/runs/submission_artifacts/metrics_val.json /content/runs/submission_artifacts/metrics_test.csv /content/runs/submission_artifacts/metrics_test.json
 - Confusion matrices and plots: /content/runs/submission_artifacts/val_plots /content/runs/submission_artifacts/test_plots
 - Raw predictions: /content/runs/predict_vis
 - Post-processed predictions: /content/runs/postproc_vis
 - Manifest: /content/runs/submission_artifacts/manifest.json
