In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Reshape, LeakyReLU, UpSampling2D, Conv2D, BatchNormalization, Dropout, Flatten
from collections import defaultdict
from tqdm import tqdm

In [None]:
# Allow GPU memory growth
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for gpu in physical_devices:
        tf.config.experimental.set_memory_growth(gpu, True)

latent_dim = 100
target_count = 1000   # Desired images per class
img_size = 224        # Resize images to 224x224

# Define your classes and dataset folder
categories = ['Acitinic Keratosis', 'Basal Cell Carcinoma', 'Melanoma', 
              'Nevus', 'Pigmented Benign Keratosis', 'Seborrheic Keratosis']
data_path = 'Skin Cancer Dataset'

# Load images for each class (in [0,255] uint8)
class_data = {}
for cat in categories:
    folder_path = os.path.join(data_path, cat)
    images = []
    if os.path.isdir(folder_path):
        for file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, file)
            img = cv2.imread(img_path)
            if img is not None:
                try:
                    img = cv2.resize(img, (img_size, img_size))
                    images.append(img)
                except Exception as e:
                    print(f"Error processing {img_path}: {e}")
    else:
        print(f"Warning: Folder {folder_path} does not exist.")
    class_data[cat] = images
    print(f"Class {cat}: {len(images)} images loaded.")


# Improved Generator: Upsamples from a 14x14 feature map to 224x224
def build_generator(latent_dim):
    model = Sequential()
    n_nodes = 256 * 14 * 14
    model.add(Dense(n_nodes, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((14, 14, 256)))
    # Upsample to 28x28
    model.add(UpSampling2D())
    model.add(Conv2D(256, kernel_size=3, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    # Upsample to 56x56
    model.add(UpSampling2D())
    model.add(Conv2D(128, kernel_size=3, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    # Upsample to 112x112
    model.add(UpSampling2D())
    model.add(Conv2D(64, kernel_size=3, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    # Upsample to 224x224
    model.add(UpSampling2D())
    model.add(Conv2D(32, kernel_size=3, padding='same'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.2))
    # Final convolution: output in [0,1]
    model.add(Conv2D(3, kernel_size=3, padding='same', activation='sigmoid'))
    return model

# Improved Discriminator: Remove sigmoid so it outputs a raw score
def build_discriminator(img_shape):
    model = Sequential()
    model.add(Conv2D(64, kernel_size=3, strides=2, input_shape=img_shape, padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(Conv2D(128, kernel_size=3, strides=2, padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(Conv2D(256, kernel_size=3, strides=2, padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(1))  # Linear output for WGAN
    return model


def train_wgan_gp(class_images, latent_dim=latent_dim, epochs=200, batch_size=16, n_critic=5, lambda_gp=10.0):
    img_shape = (img_size, img_size, 3)
    generator = build_generator(latent_dim)
    discriminator = build_discriminator(img_shape)
    
    d_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
    g_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
    
    # Normalize real images from [0,255] to [0,1]
    real_images = np.array(class_images, dtype=np.float32) / 255.0
    dataset = tf.data.Dataset.from_tensor_slices(real_images).shuffle(buffer_size=real_images.shape[0]).batch(batch_size, drop_remainder=True)
    
    @tf.function
    def d_train_step(real_imgs):
        batch_size_ = tf.shape(real_imgs)[0]
        noise = tf.random.normal([batch_size_, latent_dim])
        with tf.GradientTape() as tape:
            fake_imgs = generator(noise, training=True)
            real_validity = discriminator(real_imgs, training=True)
            fake_validity = discriminator(fake_imgs, training=True)
            
            # Gradient penalty calculation
            alpha = tf.random.uniform([batch_size_, 1, 1, 1], 0.0, 1.0)
            interpolated = alpha * real_imgs + (1 - alpha) * fake_imgs
            with tf.GradientTape() as tape_gp:
                tape_gp.watch(interpolated)
                interpolated_validity = discriminator(interpolated, training=True)
            grads = tape_gp.gradient(interpolated_validity, [interpolated])[0]
            grads = tf.reshape(grads, [batch_size_, -1])
            grad_norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=1) + 1e-12)
            gp = tf.reduce_mean((grad_norm - 1.0) ** 2)
            
            d_loss = tf.reduce_mean(fake_validity) - tf.reduce_mean(real_validity) + lambda_gp * gp
        gradients = tape.gradient(d_loss, discriminator.trainable_variables)
        d_optimizer.apply_gradients(zip(gradients, discriminator.trainable_variables))
        return d_loss, tf.reduce_mean(real_validity), tf.reduce_mean(fake_validity)
    
    @tf.function
    def g_train_step():
        noise = tf.random.normal([batch_size, latent_dim])
        with tf.GradientTape() as tape:
            fake_imgs = generator(noise, training=True)
            fake_validity = discriminator(fake_imgs, training=True)
            g_loss = -tf.reduce_mean(fake_validity)
        gradients = tape.gradient(g_loss, generator.trainable_variables)
        g_optimizer.apply_gradients(zip(gradients, generator.trainable_variables))
        return g_loss
    
    # Training loop
    for epoch in range(epochs):
        # Run n_critic updates for discriminator per generator update
        for _ in range(n_critic):
            for real_batch in dataset.take(1):
                d_loss, real_val, fake_val = d_train_step(real_batch)
        g_loss = g_train_step()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{epochs} | D loss: {d_loss:.4f} | G loss: {g_loss:.4f} | D real: {real_val:.4f} | D fake: {fake_val:.4f}")
    return generator

synthetic_data = defaultdict(list)
synthetic_save_path = "synthetic_data"
os.makedirs(synthetic_save_path, exist_ok=True)

# For each class, if count < target_count, train WGAN-GP and generate synthetic images.
for cat in categories:
    count = len(class_data[cat])
    if count < target_count:
        diff = target_count - count
        print(f"Class {cat} is underrepresented ({count} images). Generating {diff} synthetic images using WGAN-GP...")
        gen_model = train_wgan_gp(class_data[cat], epochs=200, batch_size=16, n_critic=5, lambda_gp=10.0)
        class_folder = os.path.join(synthetic_save_path, cat)
        os.makedirs(class_folder, exist_ok=True)
        for i in tqdm(range(diff), desc=f"Generating synthetic for {cat}"):
            noise = np.random.normal(0, 1, (1, latent_dim))
            synth_img = gen_model.predict(noise, verbose=0)[0]
            # Convert image from [0,1] float to [0,255] uint8
            synth_img_uint8 = np.uint8(synth_img * 255)
            synthetic_data[cat].append(synth_img_uint8)
            cv2.imwrite(os.path.join(class_folder, f"synthetic_{i}.png"),
                        cv2.cvtColor(synth_img_uint8, cv2.COLOR_RGB2BGR))
    else:
        print(f"Class {cat} is sufficiently represented (count: {count}).")

# Combine original and synthetic data for augmentation
augmented_data = []
augmented_labels = []
label_dict = {cat: idx for idx, cat in enumerate(categories)}
for cat in categories:
    imgs = class_data[cat]           # original images in [0,255]
    syn_imgs = synthetic_data.get(cat, [])
    total_imgs = imgs + syn_imgs
    augmented_data.extend(total_imgs)
    augmented_labels.extend([label_dict[cat]] * len(total_imgs))

# Convert to NumPy arrays and normalize augmented images to [0,1]
augmented_data = np.array(augmented_data, dtype=np.float32) / 255.0
augmented_labels = np.array(augmented_labels)

print("Data augmentation complete!")
print(f"Augmented dataset size: {augmented_data.shape[0]} images")

Class Acitinic Keratosis: 500 images loaded.
Class Basal Cell Carcinoma: 500 images loaded.
Class Melanoma: 505 images loaded.
Class Nevus: 500 images loaded.
Class Pigmented Benign Keratosis: 500 images loaded.
Class Seborrheic Keratosis: 500 images loaded.
Class Acitinic Keratosis is underrepresented (500 images). Generating 500 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -72.1756 | G loss: -229.5781 | D real: 314.2914 | D fake: 215.9108
Epoch 20/200 | D loss: -13.9337 | G loss: 26.3479 | D real: 21.5690 | D fake: 3.9101
Epoch 30/200 | D loss: 18.7225 | G loss: -99.0958 | D real: 120.2775 | D fake: 135.5092
Epoch 40/200 | D loss: -0.2838 | G loss: -35.1240 | D real: 24.1869 | D fake: 23.4068
Epoch 50/200 | D loss: -3.4967 | G loss: 68.1900 | D real: -44.2136 | D fake: -47.8445
Epoch 60/200 | D loss: -2.9592 | G loss: 36.9135 | D real: -47.8217 | D fake: -50.9026
Epoch 70/200 | D loss: -14.3288 | G loss: 105.3916 | D real: -102.2248 | D fake: -117.4342
Epoch 80/200 | D lo

Generating synthetic for Acitinic Keratosis: 100%|██████████| 500/500 [00:31<00:00, 15.83it/s]


Class Basal Cell Carcinoma is underrepresented (500 images). Generating 500 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -70.8176 | G loss: -160.3927 | D real: 227.6046 | D fake: 134.3074
Epoch 20/200 | D loss: -49.5050 | G loss: -243.3641 | D real: 314.1735 | D fake: 250.2377
Epoch 30/200 | D loss: -18.0581 | G loss: -185.6517 | D real: 211.6005 | D fake: 188.7256
Epoch 40/200 | D loss: -7.9396 | G loss: -16.4537 | D real: 46.6399 | D fake: 37.2425
Epoch 50/200 | D loss: -2.3608 | G loss: -31.3196 | D real: 41.8118 | D fake: 39.2409
Epoch 60/200 | D loss: -5.8494 | G loss: 9.7903 | D real: 3.6513 | D fake: -2.3068
Epoch 70/200 | D loss: -6.9012 | G loss: -26.5474 | D real: 20.6298 | D fake: 13.7262
Epoch 80/200 | D loss: -16.3920 | G loss: 159.5298 | D real: -148.0258 | D fake: -166.6843
Epoch 90/200 | D loss: -6.9733 | G loss: -92.5928 | D real: 105.6634 | D fake: 97.9364
Epoch 100/200 | D loss: -12.0510 | G loss: 54.3651 | D real: -53.9524 | D fake: -68.9087
Epoch 110/20

Generating synthetic for Basal Cell Carcinoma: 100%|██████████| 500/500 [00:34<00:00, 14.60it/s]


Class Melanoma is underrepresented (505 images). Generating 495 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -66.4920 | G loss: 22.3503 | D real: 64.7679 | D fake: -30.8244
Epoch 20/200 | D loss: -22.4554 | G loss: -171.9263 | D real: 207.1654 | D fake: 174.7980
Epoch 30/200 | D loss: -6.6718 | G loss: -53.8908 | D real: 52.7818 | D fake: 44.5159
Epoch 40/200 | D loss: -2.3597 | G loss: 40.9251 | D real: -33.7176 | D fake: -36.6604
Epoch 50/200 | D loss: -1.6278 | G loss: 16.8081 | D real: 5.7616 | D fake: 3.9721
Epoch 60/200 | D loss: 2.7489 | G loss: -161.0603 | D real: 153.6126 | D fake: 156.0046
Epoch 70/200 | D loss: 5.0018 | G loss: -21.5962 | D real: 9.2065 | D fake: 14.1988
Epoch 80/200 | D loss: -2.6697 | G loss: -6.0928 | D real: 21.9980 | D fake: 19.2773
Epoch 90/200 | D loss: -2.7046 | G loss: 1.7367 | D real: -6.2399 | D fake: -8.9578
Epoch 100/200 | D loss: -3.7322 | G loss: 45.4231 | D real: -30.1774 | D fake: -34.1938
Epoch 110/200 | D loss: -1.6521 | G loss

Generating synthetic for Melanoma: 100%|██████████| 495/495 [01:30<00:00,  5.50it/s]


Class Nevus is underrepresented (500 images). Generating 500 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -118.2937 | G loss: -591.7361 | D real: 808.9854 | D fake: 613.9547
Epoch 20/200 | D loss: -107.5850 | G loss: -467.2721 | D real: 559.3958 | D fake: 416.6608
Epoch 30/200 | D loss: -32.8508 | G loss: -205.6419 | D real: 263.1173 | D fake: 213.3132
Epoch 40/200 | D loss: -39.5551 | G loss: -113.0780 | D real: 179.9140 | D fake: 124.7859
Epoch 50/200 | D loss: -34.9384 | G loss: -35.5683 | D real: 95.4747 | D fake: 43.8785
Epoch 60/200 | D loss: -44.4388 | G loss: -93.8027 | D real: 141.0623 | D fake: 78.9300
Epoch 70/200 | D loss: -37.2540 | G loss: -29.6684 | D real: 108.3107 | D fake: 54.9095
Epoch 80/200 | D loss: -31.7776 | G loss: -148.3019 | D real: 172.0252 | D fake: 126.7090
Epoch 90/200 | D loss: -12.9466 | G loss: -150.0097 | D real: 150.4559 | D fake: 135.7314
Epoch 100/200 | D loss: 0.2004 | G loss: -107.6481 | D real: 106.3625 | D fake: 106.5011
Epoch 110/2

Generating synthetic for Nevus: 100%|██████████| 500/500 [01:28<00:00,  5.62it/s]


Class Pigmented Benign Keratosis is underrepresented (500 images). Generating 500 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -97.9839 | G loss: -496.4385 | D real: 677.6969 | D fake: 515.7761
Epoch 20/200 | D loss: -35.4611 | G loss: -365.9973 | D real: 444.9709 | D fake: 386.2709
Epoch 30/200 | D loss: -10.5093 | G loss: -251.9979 | D real: 282.9178 | D fake: 266.9203
Epoch 40/200 | D loss: -11.0351 | G loss: -153.8676 | D real: 162.9816 | D fake: 151.2151
Epoch 50/200 | D loss: -5.4445 | G loss: -199.2551 | D real: 211.6152 | D fake: 204.3473
Epoch 60/200 | D loss: -8.0212 | G loss: 46.7191 | D real: -33.9126 | D fake: -42.5141
Epoch 70/200 | D loss: -6.5037 | G loss: -13.0667 | D real: 21.0659 | D fake: 13.2809
Epoch 80/200 | D loss: 1.6969 | G loss: -97.7358 | D real: 116.4653 | D fake: 117.5272
Epoch 90/200 | D loss: -1.8640 | G loss: -100.7233 | D real: 121.3928 | D fake: 119.5195
Epoch 100/200 | D loss: 3.9728 | G loss: -59.5882 | D real: 69.0770 | D fake: 71.9130


Generating synthetic for Pigmented Benign Keratosis: 100%|██████████| 500/500 [00:29<00:00, 16.96it/s]


Class Seborrheic Keratosis is underrepresented (500 images). Generating 500 synthetic images using WGAN-GP...
Epoch 10/200 | D loss: -89.6475 | G loss: -192.6721 | D real: 370.7676 | D fake: 225.5564
Epoch 20/200 | D loss: -63.3904 | G loss: -61.6199 | D real: 107.4305 | D fake: 23.4460
Epoch 30/200 | D loss: -15.6448 | G loss: -132.6700 | D real: 177.6565 | D fake: 152.4722
Epoch 40/200 | D loss: -5.6655 | G loss: -71.5793 | D real: 73.4076 | D fake: 63.0964
Epoch 50/200 | D loss: -2.0773 | G loss: 31.9673 | D real: -49.3456 | D fake: -51.5914
Epoch 60/200 | D loss: -2.2983 | G loss: -12.9114 | D real: 29.3045 | D fake: 26.8682
Epoch 70/200 | D loss: -5.0559 | G loss: 16.8556 | D real: -35.7695 | D fake: -41.1390
Epoch 80/200 | D loss: -14.1765 | G loss: 61.8498 | D real: -13.4850 | D fake: -28.8504
Epoch 90/200 | D loss: -6.3774 | G loss: -67.3154 | D real: 81.1562 | D fake: 74.7717
Epoch 100/200 | D loss: -2.3967 | G loss: 71.3788 | D real: -62.1898 | D fake: -64.6292
Epoch 110/200 

Generating synthetic for Seborrheic Keratosis: 100%|██████████| 500/500 [00:31<00:00, 15.84it/s]


Data augmentation complete!
Augmented dataset size: 6000 images
