In [1]:
import json
from pathlib import Path
from PIL import Image
import shutil

# Path to your dataset root
root = Path("/home/tzh005/ECE253Project/detecting-the-unexpected/datasets/dataset_RoadAnomaly")

frames_dir = root / "frames"
json_path = root / "frame_list.json"

# Load frame list
frame_list = json.loads(json_path.read_text())

new_frame_list = []
extensions = [".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"]

for old_name in frame_list:
    old_path = frames_dir / old_name
    stem = old_path.stem            # e.g., "IMG_4088"
    new_name = stem + ".webp"
    new_path = frames_dir / new_name

    # Convert image → webp
    if old_path.is_file():
        print(f"Converting {old_name} → {new_name}")
        img = Image.open(old_path).convert("RGB")
        img.save(new_path, "webp", quality=95)

        # Optionally delete original (uncomment after verifying results)
        # old_path.unlink()

    # Rename the label folder if it exists
    old_label_dir = frames_dir / (old_name + ".labels")
    if not old_label_dir.exists():
        # Try extension-less folder name
        old_label_dir2 = frames_dir / (stem + ".labels")
        if old_label_dir2.exists():
            old_label_dir = old_label_dir2

    if old_label_dir.exists():
        new_label_dir = frames_dir / (new_name + ".labels")
        print(f"Renaming labels: {old_label_dir.name} → {new_label_dir.name}")
        shutil.move(str(old_label_dir), str(new_label_dir))

    new_frame_list.append(new_name)

# Write new frame_list.json
with open(json_path, "w") as f:
    json.dump(new_frame_list, f, indent=2)

print("Done. Converted all entries to .webp and updated frame_list.json.")


Converting IMG_4088.JPG → IMG_4088.webp
Renaming labels: IMG_4088.labels → IMG_4088.webp.labels
Converting IMG_4089.JPG → IMG_4089.webp
Renaming labels: IMG_4089.labels → IMG_4089.webp.labels
Converting IMG_4090.JPG → IMG_4090.webp
Renaming labels: IMG_4090.labels → IMG_4090.webp.labels
Converting IMG_4091.JPG → IMG_4091.webp
Renaming labels: IMG_4091.labels → IMG_4091.webp.labels
Converting IMG_4092.JPG → IMG_4092.webp
Renaming labels: IMG_4092.labels → IMG_4092.webp.labels
Converting IMG_4093.JPG → IMG_4093.webp
Renaming labels: IMG_4093.labels → IMG_4093.webp.labels
Converting IMG_4094.JPG → IMG_4094.webp
Renaming labels: IMG_4094.labels → IMG_4094.webp.labels
Converting IMG_4095.JPG → IMG_4095.webp
Renaming labels: IMG_4095.labels → IMG_4095.webp.labels
Converting IMG_4097.JPG → IMG_4097.webp
Renaming labels: IMG_4097.labels → IMG_4097.webp.labels
Converting IMG_4098.JPG → IMG_4098.webp
Renaming labels: IMG_4098.labels → IMG_4098.webp.labels
Converting IMG_4099.JPG → IMG_4099.webp


In [5]:
from pathlib import Path
from PIL import Image
import json
import shutil

# ====== CONFIGURE THESE ======
DATASET_ROOT = Path("/home/tzh005/ECE253Project/detecting-the-unexpected/datasets/dataset_RoadAnomaly")
TARGET_EXT   = ".webp"   # change to ".png" if you prefer
DELETE_OLD   = False     # set True after you verify everything works
# =============================

frames_dir = DATASET_ROOT / "frames"
json_path  = DATASET_ROOT / "frame_list.json"

frame_list = json.loads(json_path.read_text())
new_frame_list = []

# Candidate source extensions we'll look for
src_exts = [".png", ".jpg", ".jpeg", ".JPG", ".JPEG", ".PNG", ".webp", ".WEBP"]

for old_name in frame_list:
    # We only trust the stem; extension can be anything
    stem = Path(old_name).stem                 # e.g. "IMG_4088"
    target_name = stem + TARGET_EXT           # e.g. "IMG_4088.webp"
    target_img_path = frames_dir / target_name

    # 1) Find existing image file for this stem
    src_img_path = None
    for e in src_exts:
        cand = frames_dir / (stem + e)
        if cand.exists():
            src_img_path = cand
            break

    if src_img_path is None:
        print(f"[WARN] No image found for stem '{stem}' (skipping conversion)")
    else:
        if target_img_path.exists():
            print(f"[SKIP IMG] {target_name} already exists")
        else:
            print(f"[CONVERT IMG] {src_img_path.name} -> {target_name}")
            img = Image.open(src_img_path).convert("RGB")
            img.save(target_img_path, TARGET_EXT.lstrip("."), quality=95)

        if DELETE_OLD and src_img_path.exists() and src_img_path.suffix.lower() != TARGET_EXT.lower():
            print(f"[DELETE OLD IMG] {src_img_path.name}")
            src_img_path.unlink()

    # 2) Normalize label folder to stem.labels (NO extension before .labels)
    desired_label_dir = frames_dir / f"{stem}.labels"

    # Possible existing label directory names
    label_candidates = [
        frames_dir / f"{old_name}.labels",      # e.g. IMG_4088.JPG.labels
        frames_dir / f"{stem}.labels",          # e.g. IMG_4088.labels
        frames_dir / f"{target_name}.labels",   # e.g. IMG_4088.webp.labels (if created accidentally)
    ]

    found_label_dir = None
    for cand in label_candidates:
        if cand.exists():
            found_label_dir = cand
            break

    if found_label_dir is None:
        print(f"[WARN] No label dir found for stem '{stem}'")
    elif found_label_dir != desired_label_dir:
        print(f"[RENAME LABELS] {found_label_dir.name} -> {desired_label_dir.name}")
        if desired_label_dir.exists() and desired_label_dir != found_label_dir:
            backup = desired_label_dir.with_name(desired_label_dir.name + ".bak")
            print(f"  [BACKUP] {desired_label_dir.name} -> {backup.name}")
            shutil.move(str(desired_label_dir), str(backup))
        shutil.move(str(found_label_dir), str(desired_label_dir))

    # 3) Record the new filename (with TARGET_EXT) for frame_list.json
    new_frame_list.append(target_name)

# 4) Save updated frame_list.json
with open(json_path, "w") as f:
    json.dump(new_frame_list, f, indent=2)

print("Done.")
print("All entries in frame_list.json now use extension:", TARGET_EXT)
print("Label folders are normalized to '<stem>.labels'.")


[SKIP IMG] IMG_4088.webp already exists
[SKIP IMG] IMG_4089.webp already exists
[SKIP IMG] IMG_4090.webp already exists
[SKIP IMG] IMG_4091.webp already exists
[SKIP IMG] IMG_4092.webp already exists
[SKIP IMG] IMG_4093.webp already exists
[SKIP IMG] IMG_4094.webp already exists
[SKIP IMG] IMG_4095.webp already exists
[SKIP IMG] IMG_4097.webp already exists
[SKIP IMG] IMG_4098.webp already exists
[SKIP IMG] IMG_4099.webp already exists
[SKIP IMG] IMG_4100.webp already exists
[SKIP IMG] IMG_4101.webp already exists
[SKIP IMG] Random1205_glared.webp already exists
[SKIP IMG] Random1269_glared.webp already exists
[SKIP IMG] Random1334_glared.webp already exists
[SKIP IMG] Random133_glared.webp already exists
[SKIP IMG] Random1360_glared.webp already exists
[SKIP IMG] Random1399_glared.webp already exists
[SKIP IMG] Random1416_glared.webp already exists
[SKIP IMG] Random1498_glared.webp already exists
[SKIP IMG] Random149_glared.webp already exists
[SKIP IMG] Random154_glared.webp already 