In [5]:
import os
import shutil
import random

# Original dataset paths
image_dir = "data/archive/ISIC2018_Task1-2_Training_Input"
mask_dir = "data/archive/ISIC2018_Task1_Training_GroundTruth"

# New subset folder
subset_image_dir = "subset/images"
subset_mask_dir = "subset/masks"

os.makedirs(subset_image_dir, exist_ok=True)
os.makedirs(subset_mask_dir, exist_ok=True)

# Choose subset size
SUBSET_SIZE = 1000   # change to 500 / 1500 if needed

# Get all image filenames
all_images = sorted(os.listdir(image_dir))

# Filter images that have corresponding masks
images_with_masks = []
for img_name in all_images:
    base_name = os.path.splitext(img_name)[0]
    mask_name = base_name + "_segmentation.png"
    mask_path = os.path.join(mask_dir, mask_name)
    if os.path.exists(mask_path):
        images_with_masks.append(img_name)

print(f"Found {len(images_with_masks)} images with corresponding masks")

# Select random subset from images that have masks
selected_images = random.sample(images_with_masks, min(SUBSET_SIZE, len(images_with_masks)))

for img_name in selected_images:
    
    # Copy image
    shutil.copy(
        os.path.join(image_dir, img_name),
        os.path.join(subset_image_dir, img_name)
    )

    # Copy corresponding mask
    base_name = os.path.splitext(img_name)[0]
    mask_name = base_name + "_segmentation.png"
    shutil.copy(
        os.path.join(mask_dir, mask_name),
        os.path.join(subset_mask_dir, mask_name)
    )

print("Subset created successfully!")
print("Images copied:", len(os.listdir(subset_image_dir)))
print("Masks copied:", len(os.listdir(subset_mask_dir)))

Found 2594 images with corresponding masks
Subset created successfully!
Images copied: 1000
Masks copied: 1000


## WEEK 2

In [7]:
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

In [8]:
IMG_SIZE = 256

def load_data(image_path, mask_path):
    images = []
    masks = []

    image_files = sorted(os.listdir(image_path))

    for img_name in image_files:
        img = cv2.imread(os.path.join(image_path, img_name))
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img / 255.0

        base = os.path.splitext(img_name)[0]
        mask_name = base + "_segmentation.png"

        mask = cv2.imread(os.path.join(mask_path, mask_name), 0)
        mask = cv2.resize(mask, (IMG_SIZE, IMG_SIZE))
        mask = mask / 255.0
        mask = np.expand_dims(mask, axis=-1)

        images.append(img)
        masks.append(mask)

    return np.array(images), np.array(masks)

X, y = load_data("subset/images", "subset/masks")

print("Images shape:", X.shape)
print("Masks shape:", y.shape)

Images shape: (1000, 256, 256, 3)
Masks shape: (1000, 256, 256, 1)


In [9]:
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [10]:
def dice_coef(y_true, y_pred):
    smooth = 1e-6
    y_true_f = tf.keras.backend.flatten(y_true)
    y_pred_f = tf.keras.backend.flatten(y_pred)
    intersection = tf.reduce_sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (
        tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth
    )

In [11]:
def build_unet():
    inputs = layers.Input((IMG_SIZE, IMG_SIZE, 3))

    # Encoder
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D()(c1)

    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D()(c2)

    c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(256, 3, activation='relu', padding='same')(c3)

    # Decoder
    u4 = layers.UpSampling2D()(c3)
    u4 = layers.concatenate([u4, c2])
    c4 = layers.Conv2D(128, 3, activation='relu', padding='same')(u4)
    c4 = layers.Conv2D(128, 3, activation='relu', padding='same')(c4)

    u5 = layers.UpSampling2D()(c4)
    u5 = layers.concatenate([u5, c1])
    c5 = layers.Conv2D(64, 3, activation='relu', padding='same')(u5)
    c5 = layers.Conv2D(64, 3, activation='relu', padding='same')(c5)

    outputs = layers.Conv2D(1, 1, activation='sigmoid')(c5)

    return models.Model(inputs, outputs)

model = build_unet()

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[dice_coef]
)

model.summary()

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=16
)

Epoch 1/15
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m721s[0m 14s/step - dice_coef: 0.3017 - loss: 0.5261 - val_dice_coef: 0.4260 - val_loss: 0.4212
Epoch 2/15
[1m46/50[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m57s[0m 14s/step - dice_coef: 0.4275 - loss: 0.3890 

In [None]:
def iou_score(y_true, y_pred):
    y_pred = tf.cast(y_pred > 0.5, tf.float32)
    intersection = tf.reduce_sum(y_true * y_pred)
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection
    return intersection / (union + 1e-6)

preds = model.predict(X_val)

print("Validation IoU:", iou_score(y_val, preds).numpy())

In [None]:
def show_prediction(index):
    plt.figure(figsize=(12,4))

    plt.subplot(1,3,1)
    plt.imshow(X_val[index])
    plt.title("Original")

    plt.subplot(1,3,2)
    plt.imshow(y_val[index].squeeze(), cmap='gray')
    plt.title("Ground Truth")

    plt.subplot(1,3,3)
    plt.imshow(preds[index].squeeze(), cmap='gray')
    plt.title("Prediction")

    plt.show()

show_prediction(5)

## Brightness Augmentation

In [None]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    brightness_range=[0.6, 1.4]
)


In [None]:
def segmentation_generator(images, masks, batch_size):
    while True:
        idx = np.random.randint(0, len(images), batch_size)
        
        batch_images = images[idx]
        batch_masks = masks[idx]
        
        # Apply brightness augmentation only to images
        augmented_images = []
        for img in batch_images:
            img = brightness_datagen.random_transform(img)
            augmented_images.append(img)
        
        augmented_images = np.array(augmented_images)
        
        yield augmented_images, batch_masks

In [None]:
batch_size = 16

train_gen = segmentation_generator(X_train, y_train, batch_size)

history_aug = model.fit(
    train_gen,
    steps_per_epoch=len(X_train)//batch_size,
    validation_data=(X_val, y_val),
    epochs=15
)

In [None]:
preds_aug = model_aug.predict(X_val)

dice_aug = np.mean([dice_coef(y_val[i], preds_aug[i]).numpy() for i in range(len(y_val))])
iou_aug = iou_score(y_val, preds_aug).numpy()

print("Brightness Dice:", dice_aug)
print("Brightness IoU:", iou_aug)

In [None]:
def show_brightness_prediction(index):
    plt.figure(figsize=(12,4))

    # Original Image
    plt.subplot(1,3,1)
    plt.imshow(X_val[index])
    plt.title("Original")
    plt.axis("off")

    # Ground Truth Mask
    plt.subplot(1,3,2)
    plt.imshow(y_val[index].squeeze(), cmap='gray')
    plt.title("Ground Truth")
    plt.axis("off")

    # Brightness-Augmented Model Prediction
    plt.subplot(1,3,3)
    plt.imshow(preds_aug[index].squeeze(), cmap='gray')
    plt.title("Brightness Prediction")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

show_brightness_prediction(5)

In [None]:
print("Baseline Dice:", history.history['val_dice_coef'][-1])
print("Baseline IoU:", iou_score(y_val, preds).numpy())

print("Brightness Dice:", dice_aug)
print("Brightness IoU:", iou_aug)

In [None]:
def show_overlay_comparison(index):
    plt.figure(figsize=(16,4))

    original = X_val[index]
    gt = y_val[index].squeeze()
    base_pred = preds[index].squeeze()
    bright_pred = preds_aug[index].squeeze()

    # Original
    plt.subplot(1,4,1)
    plt.imshow(original)
    plt.title("Original")
    plt.axis("off")

    # Ground Truth Overlay
    plt.subplot(1,4,2)
    plt.imshow(original)
    plt.imshow(gt, cmap='jet', alpha=0.5)
    plt.title("GT Overlay")
    plt.axis("off")

    # Baseline Overlay
    plt.subplot(1,4,3)
    plt.imshow(original)
    plt.imshow(base_pred, cmap='jet', alpha=0.5)
    plt.title("Baseline Overlay")
    plt.axis("off")

    # Brightness Overlay
    plt.subplot(1,4,4)
    plt.imshow(original)
    plt.imshow(bright_pred, cmap='jet', alpha=0.5)
    plt.title("Brightness Overlay")
    plt.axis("off")

    plt.tight_layout()
    plt.show()

show_overlay_comparison(5)