In [7]:
import os
import glob
import random
from PIL import Image
import numpy as np
import cv2
from tqdm import tqdm
from facenet_pytorch import MTCNN
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
import torch

In [8]:
# Initialize models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(keep_all=False, device=device)
from segment_anything import sam_model_registry
sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth").to(device)
mask_generator = SamAutomaticMaskGenerator(sam)


In [3]:
import os
import glob
from PIL import Image
from tqdm import tqdm
import torch
from facenet_pytorch import MTCNN

# Initialize MTCNN
device = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(keep_all=False, device=device)

# Input and output paths
input_dir = 'images/skin_issues'  # root where your images are
output_dir = 'face_extracted_images'
os.makedirs(output_dir, exist_ok=True)

# Get all image paths
image_paths = glob.glob(f"{input_dir}/**/*.jpg", recursive=True)

# Process all images
for path in tqdm(image_paths, desc="Extracting faces"):
    try:
        img = Image.open(path).convert("RGB")
        face = mtcnn(img)

        if face is not None:
            face_img = face.permute(1, 2, 0).int().numpy().astype('uint8')
            face_pil = Image.fromarray(face_img)
            filename = os.path.basename(path)
            face_pil.save(os.path.join(output_dir, filename))
    except Exception as e:
        print(f"Error processing {path}: {e}")



xtracting faces: 100%|████████████████████████████████████████████████████████████| 1595/1595 [08:09<00:00,  3.26it/s]

In [9]:
# YOLO class IDs

def extract_faces(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    all_images = glob.glob(f"{input_dir}/**/*.jpg", recursive=True)

    for img_path in tqdm(all_images, desc="Extracting faces"):
        try:
            img = Image.open(img_path).convert('RGB')
            face = mtcnn(img)
            if face is None:
                continue
            np_img = face.permute(1, 2, 0).cpu().numpy()
            np_img = ((np_img + 1) * 127.5).astype(np.uint8)
            rel_path = os.path.relpath(img_path, input_dir)
            save_path = os.path.join(output_dir, rel_path)
            os.makedirs(os.path.dirname(save_path), exist_ok=True)
            Image.fromarray(np_img).save(save_path)
        except:
            continue

if __name__ == '__main__':
    # Step 1: Extract faces to a separate folder
    extract_faces(input_dir='images/skin_issues', output_dir='faces_dataset')

 



xtracting faces: 100%|████████████████████████████████████████████████████████████| 1595/1595 [07:54<00:00,  3.36it/s]

In [None]:
def process_image(image_path):
    try:
        img = Image.open(image_path).convert('RGB')
        np_img = np.array(img)
        masks = mask_generator.generate(np_img)
        h, w = np_img.shape[:2]
        yolo_annotations = []
        for m in masks:
            mask = m['segmentation']
            region = cv2.bitwise_and(np_img, np_img, mask=mask.astype(np.uint8))
            if is_wrinkle(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['wrinkles'], w, h)
                if yolo: yolo_annotations.append(yolo)
            if is_redness(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['redness'], w, h)
                if yolo: yolo_annotations.append(yolo)
            if is_dryness(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['dryness'], w, h)
                if yolo: yolo_annotations.append(yolo)

        acne_label = get_acne_label(image_path)
        if acne_label is not None:
            yolo = f"{acne_label} 0.5 0.5 1.0 1.0"
            yolo_annotations.append(yolo)

        return np_img, yolo_annotations
    except:
        return None, None

def prepare_dataset(image_root='faces_dataset', output_root='yolo_dataset', split_ratio=0.8):
    all_images = glob.glob(f"{image_root}/**/*.jpg", recursive=True)
    random.shuffle(all_images)
    split_idx = int(len(all_images) * split_ratio)
    train_imgs, val_imgs = all_images[:split_idx], all_images[split_idx:]

    for mode, images in zip(['train', 'val'], [train_imgs, val_imgs]):
        img_dir = os.path.join(output_root, 'images', mode)
        lbl_dir = os.path.join(output_root, 'labels', mode)
        os.makedirs(img_dir, exist_ok=True)
        os.makedirs(lbl_dir, exist_ok=True)

        for img_path in tqdm(images, desc=f"Processing {mode} set"):
            cropped_img, annotations = process_image(img_path)
            if cropped_img is None or not annotations:
                continue

            base = os.path.basename(img_path)
            base = os.path.splitext(base)[0]
            out_img_path = os.path.join(img_dir, base + ".jpg")
            out_lbl_path = os.path.join(lbl_dir, base + ".txt")

            Image.fromarray(cropped_img).save(out_img_path)
            with open(out_lbl_path, 'w') as f:
                f.write("\n".join(annotations))

    # Write dataset.yaml
    with open(os.path.join(output_root, 'dataset.yaml'), 'w') as f:
        f.write("""
path: ./yolo_dataset
train: images/train
val: images/val
names:
  0: wrinkles
  1: redness
  2: dryness
  3: acne_Level_0
  4: acne_Level_1
  5: acne_Level_2
""")


if __name__ == '__main__':
   # Step 2: Label the face images
    prepare_dataset(image_root='faces_dataset')


Processing train set:   8%|███▊                                               | 80/1055 [10:08:54<98:49:12, 364.87s/it]

In [None]:
def process_image(image_path):
    try:
        img = Image.open(image_path).convert('RGB')
        np_img = np.array(img)
        masks = mask_generator.generate(np_img)
        h, w = np_img.shape[:2]
        yolo_annotations = []
        for m in masks:
            mask = m['segmentation']
            region = cv2.bitwise_and(np_img, np_img, mask=mask.astype(np.uint8))
            if is_wrinkle(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['wrinkles'], w, h)
                if yolo: yolo_annotations.append(yolo)
            if is_redness(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['redness'], w, h)
                if yolo: yolo_annotations.append(yolo)
            if is_dryness(np_img, mask):
                yolo = mask_to_yolo(mask, CLASS_MAP['dryness'], w, h)
                if yolo: yolo_annotations.append(yolo)

        acne_label = get_acne_label(image_path)
        if acne_label is not None:
            yolo = f"{acne_label} 0.5 0.5 1.0 1.0"
            yolo_annotations.append(yolo)

        return np_img, yolo_annotations
    except:
        return None, None

def prepare_dataset(image_root='faces_dataset', output_root='yolo_dataset', split_ratio=0.8):
    all_images = glob.glob(f"{image_root}/**/*.jpg", recursive=True)
    random.shuffle(all_images)
    split_idx = int(len(all_images) * split_ratio)
    train_imgs, val_imgs = all_images[:split_idx], all_images[split_idx:]

    for mode, images in zip(['train', 'val'], [train_imgs, val_imgs]):
        img_dir = os.path.join(output_root, 'images', mode)
        lbl_dir = os.path.join(output_root, 'labels', mode)
        os.makedirs(img_dir, exist_ok=True)
        os.makedirs(lbl_dir, exist_ok=True)

        for img_path in tqdm(images, desc=f"Processing {mode} set"):
            cropped_img, annotations = process_image(img_path)
            if cropped_img is None or not annotations:
                continue

            base = os.path.basename(img_path)
            base = os.path.splitext(base)[0]
            out_img_path = os.path.join(img_dir, base + ".jpg")
            out_lbl_path = os.path.join(lbl_dir, base + ".txt")

            Image.fromarray(cropped_img).save(out_img_path)
            with open(out_lbl_path, 'w') as f:
                f.write("\n".join(annotations))

    # Write dataset.yaml
    with open(os.path.join(output_root, 'dataset.yaml'), 'w') as f:
        f.write("""
path: ./yolo_dataset
train: images/train
val: images/val
names:
  0: wrinkles
  1: redness
  2: dryness
  3: acne_Level_0
  4: acne_Level_1
  5: acne_Level_2
""")


if __name__ == '__main__':
   # Step 2: Label the face images
    prepare_dataset(image_root='faces_dataset')


In [None]:
from multiprocessing import Pool, cpu_count

def save_processed(img_path_and_data, img_dir, lbl_dir):
    img_path, result = img_path_and_data
    cropped_img, annotations = result
    if cropped_img is None or not annotations:
        return

    base = os.path.basename(img_path)
    base = os.path.splitext(base)[0]
    out_img_path = os.path.join(img_dir, base + ".jpg")
    out_lbl_path = os.path.join(lbl_dir, base + ".txt")

    Image.fromarray(cropped_img).save(out_img_path)
    with open(out_lbl_path, 'w') as f:
        f.write("\n".join(annotations))

def prepare_dataset(image_root='faces_dataset', output_root='yolo_dataset', split_ratio=0.8):
    all_images = glob.glob(f"{image_root}/**/*.jpg", recursive=True)
    random.shuffle(all_images)
    split_idx = int(len(all_images) * split_ratio)
    train_imgs, val_imgs = all_images[:split_idx], all_images[split_idx:]

    for mode, images in zip(['train', 'val'], [train_imgs, val_imgs]):
        img_dir = os.path.join(output_root, 'images', mode)
        lbl_dir = os.path.join(output_root, 'labels', mode)
        os.makedirs(img_dir, exist_ok=True)
        os.makedirs(lbl_dir, exist_ok=True)

        print(f"Processing {mode} set with {len(images)} images using {cpu_count()} processes...")
        with Pool(processes=cpu_count()) as pool:
            results = list(tqdm(pool.imap(process_image, images), total=len(images), desc=f"Processing {mode}"))
            for img_path, result in zip(images, results):
                save_processed((img_path, result), img_dir, lbl_dir)

    # Write dataset.yaml
    with open(os.path.join(output_root, 'dataset.yaml'), 'w') as f:
        f.write("""
path: ./yolo_dataset
train: images/train
val: images/val
names:
  0: wrinkles
  1: redness
  2: dryness
  3: acne_Level_0
  4: acne_Level_1
  5: acne_Level_2
""")

if __name__ == '__main__':
    import multiprocessing
    multiprocessing.set_start_method('spawn')  # Important for Windows or Colab!
    prepare_dataset(image_root='faces_dataset')
