In [1]:
import json, os

base_dir = "SkinProblem/train"
coco_json = os.path.join(base_dir, "_annotations.coco.json")

if not os.path.exists(coco_json):
    raise FileNotFoundError(f"‚ùå COCO file not found at {coco_json}")

with open(coco_json, "r") as f:
    coco = json.load(f)

# check original categories
print("\nüìå Original categories:")
for cat in coco["categories"]:
    print(f"- id {cat['id']}: {cat['name']}")

# fix duplicates
seen = {}
duplicate_map = {}
new_categories = []
for cat in coco["categories"]:
    if cat["name"] in seen:
        print(f"‚ö†Ô∏è duplicate {cat['name']} (id {cat['id']}) ‚Üí remap to {seen[cat['name']]}")
        duplicate_map[cat["id"]] = seen[cat["name"]]
    else:
        seen[cat["name"]] = cat["id"]
        new_categories.append(cat)

for ann in coco["annotations"]:
    if ann["category_id"] in duplicate_map:
        ann["category_id"] = duplicate_map[ann["category_id"]]

coco["categories"] = new_categories

fixed_path = os.path.join(base_dir, "_annotations_fixed.coco.json")
with open(fixed_path, "w") as f:
    json.dump(coco, f, indent=2)

print(f"\n‚úÖ Saved fixed COCO annotations ‚Üí {fixed_path}")
print("\nüìå Cleaned categories:")
for cat in coco["categories"]:
    print(f"- id {cat['id']}: {cat['name']}")



üìå Original categories:
- id 0: acne
- id 1: acne
- id 2: dark circle
- id 3: wrinkle
‚ö†Ô∏è duplicate acne (id 1) ‚Üí remap to 0

‚úÖ Saved fixed COCO annotations ‚Üí SkinProblem/train\_annotations_fixed.coco.json

üìå Cleaned categories:
- id 0: acne
- id 2: dark circle
- id 3: wrinkle


In [2]:
from pathlib import Path
from tqdm import tqdm

coco_json_path = os.path.join(base_dir, "_annotations_fixed.coco.json")
lbl_dir = os.path.join(base_dir, "labels")
os.makedirs(lbl_dir, exist_ok=True)

with open(coco_json_path, "r") as f:
    coco = json.load(f)

cat2id = {cat["id"]: i for i, cat in enumerate(coco["categories"])}

img2anns = {}
for ann in coco["annotations"]:
    img2anns.setdefault(ann["image_id"], []).append(ann)

print("\nConverting COCO ‚Üí YOLOv8-seg labels...")
for img in tqdm(coco["images"]):
    img_id = img["id"]
    anns = img2anns.get(img_id, [])
    label_path = Path(lbl_dir) / (Path(img["file_name"]).stem + ".txt")

    with open(label_path, "w") as f:
        for ann in anns:
            cls_id = cat2id[ann["category_id"]]
            h, w = img["height"], img["width"]

            if "segmentation" in ann and len(ann["segmentation"]) > 0:
                seg = ann["segmentation"][0]
                norm_seg = [str(seg[i] / w if i % 2 == 0 else seg[i] / h) for i in range(len(seg))]
                f.write(f"{cls_id} " + " ".join(norm_seg) + "\n")
            else:
                x, y, bw, bh = ann["bbox"]
                poly = [x, y, x+bw, y, x+bw, y+bh, x, y+bh]
                norm_seg = [str(poly[i] / w if i % 2 == 0 else poly[i] / h) for i in range(len(poly))]
                f.write(f"{cls_id} " + " ".join(norm_seg) + "\n")

print("\n‚úÖ Conversion complete! YOLOv8-seg labels created in train/labels/")



Converting COCO ‚Üí YOLOv8-seg labels...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6513/6513 [00:03<00:00, 1704.18it/s]


‚úÖ Conversion complete! YOLOv8-seg labels created in train/labels/





In [3]:
import glob

class_counts = {i: 0 for i in range(len(coco["categories"]))}
label_files = glob.glob(os.path.join(lbl_dir, "*.txt"))

for lf in label_files:
    with open(lf) as f:
        for line in f:
            cls = int(line.strip().split()[0])
            class_counts[cls] += 1

print("\nüìä YOLO label counts after conversion:")
for cat in coco["categories"]:
    print(f"- {cat['name']}: {class_counts[cat2id[cat['id']]]} polygons")



üìä YOLO label counts after conversion:
- acne: 16497 polygons
- dark circle: 3999 polygons
- wrinkle: 17885 polygons


In [4]:
import os, glob
from pathlib import Path

# Paths
base_dir = "SkinProblem/train"
img_dir = base_dir  # images are still here
lbl_dir = os.path.join(base_dir, "labels")

# Load categories from fixed COCO file
import json
coco_json_path = os.path.join(base_dir, "_annotations_fixed.coco.json")
with open(coco_json_path, "r") as f:
    coco = json.load(f)

class_names = [cat["name"] for cat in coco["categories"]]
num_classes = len(class_names)

print(f"\nüìÇ Checking dataset consistency...")
print(f"Classes ({num_classes}): {class_names}")

# Collect files
image_files = sorted(glob.glob(os.path.join(img_dir, "*.jpg")))
label_files = sorted(glob.glob(os.path.join(lbl_dir, "*.txt")))

print(f"Found {len(image_files)} images and {len(label_files)} label files")

# Check if every image has a label file
missing_labels = []
for img_file in image_files:
    lbl_file = os.path.join(lbl_dir, Path(img_file).stem + ".txt")
    if not os.path.exists(lbl_file):
        missing_labels.append(img_file)

if missing_labels:
    print(f"‚ö†Ô∏è {len(missing_labels)} images have no labels. Example: {missing_labels[:5]}")
else:
    print("‚úÖ All images have corresponding label files.")

# Check label validity + counts
class_counts = {i: 0 for i in range(num_classes)}
bad_labels = []

for lf in label_files:
    with open(lf) as f:
        for line in f:
            parts = line.strip().split()
            if not parts:
                continue
            cls = int(parts[0])
            if cls < 0 or cls >= num_classes:
                bad_labels.append((lf, line.strip()))
            else:
                class_counts[cls] += 1

print("\nüìä Label counts per class:")
for i, name in enumerate(class_names):
    print(f"- {name}: {class_counts[i]} polygons")

if bad_labels:
    print(f"\‚ö†Ô∏è Found {len(bad_labels)} invalid class IDs! Example: {bad_labels[:5]}")
else:
    print("\n‚úÖ All labels contain valid class IDs.")



üìÇ Checking dataset consistency...
Classes (3): ['acne', 'dark circle', 'wrinkle']
Found 6513 images and 6513 label files
‚úÖ All images have corresponding label files.

üìä Label counts per class:
- acne: 16497 polygons
- dark circle: 3999 polygons
- wrinkle: 17885 polygons

‚úÖ All labels contain valid class IDs.


In [5]:
import os, glob, shutil

# Base paths
base_dir = "SkinProblem/train"
img_dir = os.path.join(base_dir, "images")
os.makedirs(img_dir, exist_ok=True)

# Move all image files (jpg/png/jpeg)
extensions = ["*.jpg", "*.jpeg", "*.png"]
moved_count = 0

for ext in extensions:
    for img_file in glob.glob(os.path.join(base_dir, ext)):
        dst = os.path.join(img_dir, os.path.basename(img_file))
        if not os.path.exists(dst):  # avoid overwriting
            shutil.move(img_file, dst)
            moved_count += 1

print(f"‚úÖ Moved {moved_count} images into 'train/images/' folder.")

‚úÖ Moved 6513 images into 'train/images/' folder.


In [6]:
import os, glob
from collections import Counter

# Paths
base_dir = "SkinProblem/train"
img_dir = os.path.join(base_dir, "images")
lbl_dir = os.path.join(base_dir, "labels")

# Define classes (must match your training config)
classes = ["acne", "dark circle", "wrinkle"]

print("üìÇ Checking dataset consistency...")

# Collect files
images = {os.path.splitext(os.path.basename(f))[0] for f in glob.glob(os.path.join(img_dir, "*"))}
labels = {os.path.splitext(os.path.basename(f))[0] for f in glob.glob(os.path.join(lbl_dir, "*.txt"))}

# 1Ô∏è‚É£ Check image-label matching
missing_labels = images - labels
missing_images = labels - images

print(f"Found {len(images)} images and {len(labels)} label files")
if missing_labels:
    print(f"‚ö†Ô∏è {len(missing_labels)} images without labels (e.g., {list(missing_labels)[:5]})")
if missing_images:
    print(f"‚ö†Ô∏è {len(missing_images)} labels without images (e.g., {list(missing_images)[:5]})")
if not missing_labels and not missing_images:
    print("‚úÖ All images have corresponding label files.")

# 2Ô∏è‚É£ Validate labels
class_counts = Counter()
bad_labels = []
for lbl_file in glob.glob(os.path.join(lbl_dir, "*.txt")):
    with open(lbl_file) as f:
        for line in f:
            parts = line.strip().split()
            if not parts:
                bad_labels.append(lbl_file)
                continue
            cls = int(parts[0])
            if cls < 0 or cls >= len(classes):
                print(f"‚ùå Invalid class ID {cls} in {lbl_file}")
            else:
                class_counts[cls] += 1

if bad_labels:
    print(f"‚ö†Ô∏è {len(bad_labels)} empty label files found.")

# 3Ô∏è‚É£ Print summary
print("\nüìä Label counts per class:")
for i, cname in enumerate(classes):
    print(f"- {cname}: {class_counts[i]} polygons")

print("\n‚úÖ Label consistency check complete.")


üìÇ Checking dataset consistency...
Found 6513 images and 6513 label files
‚úÖ All images have corresponding label files.

üìä Label counts per class:
- acne: 16497 polygons
- dark circle: 3999 polygons
- wrinkle: 17885 polygons

‚úÖ Label consistency check complete.
