In [1]:
!pip install ultralytics opencv-python numpy pandas




In [6]:
import os
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
from tqdm import tqdm
import random

# ---- Config ----
IMG_DIR = "HAM10000_images_final"
CROP_DIR = "crops"
os.makedirs(CROP_DIR, exist_ok=True)
OUTPUT_CSV = "local_labels.csv"

# lightweight YOLO model (you can use yolov8n.pt for speed)
model = YOLO("yolov8n.pt")  



In [3]:
# simple mappings
colors = ["reddish", "brownish", "light", "dark", "pale", "pinkish"]
textures = ["smooth", "rough", "irregular"]
borders = ["well-defined", "blurred", "uneven"]
positions = ["upper left", "upper center", "upper right",
              "center left", "center", "center right",
              "lower left", "lower center", "lower right"]

# templates for captions
templates = [
    "A {color} lesion with {texture} texture located at the {position}.",
    "An area of {color} skin showing {texture} pattern at the {position}.",
    "A {color} patch with {border} borders seen at the {position}.",
    "An {texture} lesion with {color} pigmentation at the {position}.",
    "A {color} lesion showing {border} edges at the {position}."
]

records = []

# ---- Helper Functions ----
def get_position(bbox, img_shape):
    x1, y1, x2, y2 = bbox
    cx = (x1 + x2) / 2
    cy = (y1 + y2) / 2
    h, w = img_shape[:2]

    row = "upper" if cy < h/3 else "center" if cy < 2*h/3 else "lower"
    col = "left" if cx < w/3 else "center" if cx < 2*w/3 else "right"
    return f"{row} {col}"

def get_color_name(crop):
    mean_bgr = np.mean(crop, axis=(0,1))
    avg = np.mean(mean_bgr)
    if avg < 60: return "dark"
    elif avg < 120: return "brownish"
    elif avg < 180: return "light"
    else: return "pale"


In [4]:

# ---- Run Inference ----
for img_file in tqdm(os.listdir(IMG_DIR)):
    if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
        continue

    img_path = os.path.join(IMG_DIR, img_file)
    img = cv2.imread(img_path)
    if img is None:
        continue

    results = model(img, verbose=False)[0]

    for i, box in enumerate(results.boxes.xyxy.cpu().numpy()):
        x1, y1, x2, y2 = map(int, box)
        conf = float(results.boxes.conf[i])
        cls = int(results.boxes.cls[i]) if results.boxes.cls is not None else -1

        # crop and save
        crop = img[y1:y2, x1:x2]
        crop_path = os.path.join(CROP_DIR, f"{os.path.splitext(img_file)[0]}_{i}.jpg")
        cv2.imwrite(crop_path, crop)

        # generate label
        label = random.choice(templates).format(
            color=get_color_name(crop),
            texture=random.choice(textures),
            border=random.choice(borders),
            position=get_position((x1,y1,x2,y2), img.shape)
        )

        records.append({
            "image_id": img_file,
            "lesion_id": i,
            "bbox": [x1, y1, x2, y2],
            "conf": round(conf, 3),
            "class": cls,
            "crop_path": crop_path,
            "local_label": label
        })


100%|████████████████████████████████████████████████████████████████████████████████| 472/472 [02:10<00:00,  3.62it/s]


In [5]:

# save results
pd.DataFrame(records).to_csv(OUTPUT_CSV, index=False)
print(f"\n✅ Done! Saved {len(records)} lesion labels to {OUTPUT_CSV}")



✅ Done! Saved 73 lesion labels to local_labels.csv


In [2]:
import os
import random
import torch
import pandas as pd
from ultralytics import YOLO
from PIL import Image




# Load YOLO model (lightweight)
# even COCO-pretrained works fine just to get bounding boxes
model = YOLO("yolov8n.pt")

# ==========================
# TEMPLATE SETTINGS (you’ll fill these later)
# ==========================
# Leave empty for now — you’ll manually inject your list later
local_label_templates = [
    "An irregular pigmented lesion located at the {position} of the image.",
    "A small {color} lesion appearing near the {position}.",
    "A {texture} and {color} spot visible at the {position}.",
    "Lesion with {texture} texture located centrally.",
    "A {size} lesion found on the {position} side of the image.",
    "The {color} lesion with {texture} surface lies toward the {position}.",
    "A {color} patch showing {texture} edges in the {position}.",
    "Lesion area of {size} and {texture} type, seen near {position}.",
    "A slightly {color} lesion detected at the {position}.",
    "A {texture}, {color} mark can be seen toward the {position}.",
    "Pigmented {texture} lesion visible at the {position}.",
    "A small {color} circular lesion lies at the {position}.",
    "A {texture} lesion occupying the {position} region.",
    "Visible {color} lesion of {size} size at the {position}.",
    "A lesion of {color} pigmentation located near the {position}.",
    "This image contains a {color} lesion in the {position} with {texture} texture.",
    "The lesion is {texture} and {color}, centered toward the {position}.",
    "Observed {color} and {texture} lesion occupying {position} of the frame.",
    "Localized {color} lesion with irregular boundary on the {position}.",
    "An evident {color} lesion placed around the {position}.",
    "A {texture} mark visible at the {position} area.",
    "Lesion of {size} size appears in the {position}.",
    "Irregular {color} region situated at the {position}.",
    "A {texture} lesion with {color} pigmentation at the {position}.",
    "Pigmentation in {color} tone concentrated at the {position}.",
    "Single lesion detected near the {position}, showing {texture} pattern."
]


# Supporting lists for random choice
colors = ["reddish", "brown", "light brown", "dark", "blackish", "pinkish", "tan", "flesh-colored"]
textures = ["smooth", "rough", "irregular", "scaly", "flat", "elevated", "granular"]
positions = ["top-left", "top-right", "bottom-left", "bottom-right", "center"]
sizes = ["small", "medium", "large"]

# ==========================
# UTILS
# ==========================
def fallback_crop(img: Image.Image):
    """Return central 60% region if YOLO fails."""
    w, h = img.size
    left, top = int(0.2*w), int(0.2*h)
    right, bottom = int(0.8*w), int(0.8*h)
    return (left, top, right, bottom)



In [5]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path

# === CONFIG ===
IMAGES_DIR = Path("HAM10000_images_final/")             # folder containing original images
OUTPUT_DIR = Path("lesion_crops/")       # folder to save cropped lesion images
OUTPUT_DIR.mkdir(exist_ok=True)
OUTPUT_CSV = "local_labels_true.csv"

# === HELPER FUNCTIONS ===
def get_color_name(rgb):
    """Roughly classify lesion color based on mean RGB"""
    r, g, b = rgb
    if np.mean(rgb) < 60:
        return "dark"
    if r > g and r > b:
        return "reddish"
    if g > r and g > b:
        return "greenish"
    if b > r and b > g:
        return "bluish"
    if abs(r - g) < 20 and abs(g - b) < 20:
        return "grayish"
    if r > 120 and g > 100 and b < 80:
        return "brownish"
    return "light-colored"

def get_position_label(cx, cy, shape):
    """Return position label (top-left, center, bottom-right, etc.)"""
    h, w = shape[:2]
    x_zone = "left" if cx < w/3 else "right" if cx > 2*w/3 else "center"
    y_zone = "top" if cy < h/3 else "bottom" if cy > 2*h/3 else "middle"
    return f"{y_zone}-{x_zone}"

def find_lesion_bbox(image):
    """Detect main lesion using simple threshold + contour analysis"""
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive threshold or Otsu for flexible lighting
    _, mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return None

    lesion_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(lesion_contour)
    return x, y, w, h


In [6]:
# === MAIN PROCESS ===
rows = []
image_paths = list(IMAGES_DIR.glob("*.jpg")) + list(IMAGES_DIR.glob("*.png"))

for img_path in image_paths:
    try:
        img = cv2.imread(str(img_path))
        if img is None:
            print(f"⚠️ Skipped unreadable file: {img_path.name}")
            continue

        result = find_lesion_bbox(img)
        if result is None:
            print(f"❌ No lesion detected for: {img_path.name}")
            continue

        x, y, w, h = result

        # Ensure reasonable crop size
        if w < 10 or h < 10:
            print(f"⚠️ Too small lesion region in {img_path.name}")
            continue

        lesion_crop = img[y:y+h, x:x+w]
        crop_path = OUTPUT_DIR / f"{img_path.stem}_lesion.jpg"
        cv2.imwrite(str(crop_path), lesion_crop)

        # Mean color & position label
        mean_color = lesion_crop.mean(axis=(0, 1)).astype(int)
        color_name = get_color_name(mean_color)
        cx, cy = x + w // 2, y + h // 2
        pos_label = get_position_label(cx, cy, img.shape)

        desc = f"A {color_name} lesion located at the {pos_label} of the image."

        rows.append({
            "image_id": img_path.stem,
            "x_min": x,
            "y_min": y,
            "x_max": x + w,
            "y_max": y + h,
            "color": color_name,
            "position": pos_label,
            "description": desc,
            "crop_path": str(crop_path)
        })

        print(f"✅ Processed: {img_path.name} -> {color_name}, {pos_label}")

    except Exception as e:
        print(f"⚠️ Error processing {img_path.name}: {e}")

# === SAVE CSV ===
if rows:
    df = pd.DataFrame(rows)
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"\n✅ Done! Saved {len(df)} lesion crops and labels to {OUTPUT_CSV}")
else:
    print("\n⚠️ No valid lesions found. Please check the input images folder.")

✅ Processed: ISIC_0024334.jpg -> bluish, middle-center
✅ Processed: ISIC_0024386.jpg -> bluish, middle-center
✅ Processed: ISIC_0024392.jpg -> bluish, middle-center
✅ Processed: ISIC_0024398.jpg -> bluish, middle-center
✅ Processed: ISIC_0024406.jpg -> bluish, middle-center
✅ Processed: ISIC_0024445.jpg -> bluish, top-center
✅ Processed: ISIC_0024450.jpg -> bluish, bottom-right
✅ Processed: ISIC_0024452.jpg -> bluish, middle-center
✅ Processed: ISIC_0024455.jpg -> bluish, middle-center
✅ Processed: ISIC_0024460.jpg -> bluish, middle-center
✅ Processed: ISIC_0024488.jpg -> bluish, middle-center
✅ Processed: ISIC_0024521.jpg -> bluish, middle-center
✅ Processed: ISIC_0024538.jpg -> bluish, middle-center
✅ Processed: ISIC_0024542.jpg -> bluish, top-center
✅ Processed: ISIC_0024544.jpg -> bluish, middle-center
✅ Processed: ISIC_0024556.jpg -> bluish, top-center
✅ Processed: ISIC_0024570.jpg -> bluish, middle-center
✅ Processed: ISIC_0024572.jpg -> bluish, middle-center
✅ Processed: ISIC_00