In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# !tar -xf /content/drive/MyDrive/skin_cancer/dataset.tar -C /content/

In [None]:
!pip install albumentations opencv-python tqdm numpy --quiet

In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import albumentations as A
import random
from google.colab import drive


#CONFIG
DATA_DIR = "/content/drive/MyDrive/skin_cancer/dataset/train/sorted"
SAVE_DIR = "/content/drive/MyDrive/skin_cancer/final_data"
IMG_W, IMG_H = 600, 450
np.random.seed(42)
random.seed(42)

# LOAD IMAGE PATHS
classes = sorted(os.listdir(DATA_DIR))
img_paths = {
    cls: [os.path.join(DATA_DIR, cls, f)
          for f in os.listdir(os.path.join(DATA_DIR, cls))
          if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    for cls in classes
}
counts = {cls: len(paths) for cls, paths in img_paths.items()}
print("ðŸ“Š Original counts:", counts)

# TARGET AUGMENTATION
targets = {}
for cls, c in counts.items():
    if c < 200:
        targets[cls] = 2000
    elif c < 500:
        targets[cls] = 3000
    elif c < 1000:
        targets[cls] = 3500
    elif c < 2000:
        targets[cls] = 4000
    else:
        targets[cls] = c
print("ðŸŽ¯ Target counts:", targets)

# AUGMENTATION PIPELINE
base_aug = A.Compose([
    A.Resize(IMG_H, IMG_W),  # Correct: (height, width)
    A.OneOf([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
    ], p=0.8),
    A.OneOf([
        A.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.05),
        A.CLAHE(clip_limit=3.0, p=0.6),
        A.RandomBrightnessContrast(p=0.6)
    ], p=0.8),
    A.OneOf([
        A.GaussNoise(p=0.5),
        A.MotionBlur(blur_limit=3, p=0.3),
        A.MedianBlur(blur_limit=3, p=0.3),
    ], p=0.5),
    A.CoarseDropout(max_holes=4, hole_height_range=(0.05, 0.1),
                    hole_width_range=(0.05, 0.1), p=0.4),
    A.ElasticTransform(alpha=10, sigma=5, p=0.3),
])

# MIXUP & CUTMIX
def resize_match(img):
    """Ensure all images are consistent 600x450 RGB"""
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_W, IMG_H))  # (width, height)
    return img

def mixup(img1, img2, alpha=0.4):
    """Blend two same-size images"""
    if img1.shape != img2.shape:
        img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
    lam = np.random.beta(alpha, alpha)
    mixed = (lam * img1 + (1 - lam) * img2).astype(np.uint8)
    return mixed

def cutmix(img1, img2):
    """Paste random patch from img2 into img1"""
    if img1.shape != img2.shape:
        img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))
    h, w, _ = img1.shape
    lam = np.random.uniform(0.3, 0.7)
    rx, ry = np.random.randint(w), np.random.randint(h)
    rw, rh = int(w * np.sqrt(1 - lam)), int(h * np.sqrt(1 - lam))
    x1, y1 = np.clip(rx - rw // 2, 0, w), np.clip(ry - rh // 2, 0, h)
    x2, y2 = np.clip(rx + rw // 2, 0, w), np.clip(ry + rh // 2, 0, h)
    img = img1.copy()
    img[y1:y2, x1:x2, :] = img2[y1:y2, x1:x2, :]
    return img

# CREATE SAVE DIRECTORIES
os.makedirs(SAVE_DIR, exist_ok=True)
for cls in classes:
    os.makedirs(os.path.join(SAVE_DIR, cls), exist_ok=True)

# AUGMENTATION LOOP
for cls in classes:
    print(f"\n=== ðŸ§¬ Processing {cls} ===")
    cls_dir = os.path.join(SAVE_DIR, cls)
    imgs = img_paths[cls]
    n_original = len(imgs)
    n_target = int(targets[cls])
    n_to_generate = max(0, n_target - n_original)
    rare_class = n_original < 800  # Apply MixUp/CutMix only for rare classes

    print(f"{cls}: {n_original} â†’ {n_target} (augment {n_to_generate})")

    # Copy originals first
    for path in imgs:
        img = cv2.imread(path)
        img = cv2.resize(img, (IMG_W, IMG_H))
        cv2.imwrite(os.path.join(cls_dir, os.path.basename(path)), img)

    # Generate augmented images
    for i in tqdm(range(n_to_generate)):
        img_path = random.choice(imgs)
        img = resize_match(cv2.imread(img_path))
        aug = base_aug(image=img)['image']

        if rare_class:
            img2_path = random.choice(imgs)
            img2 = resize_match(cv2.imread(img2_path))
            if random.random() < 0.5:
                aug = mixup(aug, img2)
            else:
                aug = cutmix(aug, img2)

        aug_bgr = cv2.cvtColor(aug, cv2.COLOR_RGB2BGR)
        cv2.imwrite(os.path.join(cls_dir, f"aug_{i}.jpg"), aug_bgr)

print("\nâœ… All augmentations completed successfully and saved to Google Drive!")

ðŸ“Š Original counts: {'AKIEC': 327, 'BCC': 514, 'BKL': 1099, 'DF': 239, 'MEL': 1113, 'NV': 6705, 'VASC': 253}
ðŸŽ¯ Target counts: {'AKIEC': 3000, 'BCC': 3500, 'BKL': 4000, 'DF': 3000, 'MEL': 4000, 'NV': 6705, 'VASC': 3000}

=== ðŸ§¬ Processing AKIEC ===
AKIEC: 327 â†’ 3000 (augment 2673)


  A.CoarseDropout(max_holes=4, hole_height_range=(0.05, 0.1),
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2673/2673 [03:37<00:00, 12.30it/s]



=== ðŸ§¬ Processing BCC ===
BCC: 514 â†’ 3500 (augment 2986)


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2986/2986 [03:56<00:00, 12.61it/s]



=== ðŸ§¬ Processing BKL ===
BKL: 1099 â†’ 4000 (augment 2901)


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2901/2901 [02:55<00:00, 16.51it/s]



=== ðŸ§¬ Processing DF ===
DF: 239 â†’ 3000 (augment 2761)


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2761/2761 [04:15<00:00, 10.80it/s]



=== ðŸ§¬ Processing MEL ===
MEL: 1113 â†’ 4000 (augment 2887)


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2887/2887 [02:53<00:00, 16.67it/s]



=== ðŸ§¬ Processing NV ===
NV: 6705 â†’ 6705 (augment 0)


0it [00:00, ?it/s]



=== ðŸ§¬ Processing VASC ===
VASC: 253 â†’ 3000 (augment 2747)


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2747/2747 [04:14<00:00, 10.80it/s]


âœ… All augmentations completed successfully and saved to Google Drive!



