<a href="https://colab.research.google.com/github/aishamohamed/GAN-Benchmarck/blob/main/classify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DCGAN-Augmented Melanoma Classification

# DCGAN-Augmented Melanoma Classification

**Author:** Aisha Mohamed
**Supervisor:** Reihaneh Tarlani  
**Course:** ISIC-2017 GAN Benchmarking (Bachelor’s Thesis, Spring 2025)  

This notebook implements an end-to-end pipeline for augmenting a ResNet-18 skin lesion classifier with DCGAN-generated synthetic melanoma images. We compare baseline (real-only) performance with augmented training using 200 and 500 synthetic samples.

## Environment Setup

In [32]:
#imports
import os, glob
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader, Subset, ConcatDataset
from torchvision import transforms as T, datasets
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.optim as optim
from pathlib import Path
from torchvision import models
import torch.nn.functional as F
import torchvision.utils as vutils
from sklearn.metrics import classification_report
import random


SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Running on:", device)


Running on: cuda


##  Paths & Hyperparameters

In [33]:
# filesystem & paths
from pathlib import Path
BASE     = Path('/content/drive/MyDrive/ISIC2017')
REAL_ROOT= BASE / 'by_class_128'
SYN_ROOT = BASE / 'synthetic_melanoma'
TEST_ROOT= BASE / 'test_by_class'

# hyperparameters
image_size = 128
batch_size = 32
num_epochs = 5
lr         = 1e-4
nz         = 100    # latent dim for your DCGAN (adjust if yours differs)
nz  = 100   # latent vector size
ngf = 64    # generator feature-map base size
nc  = 3     # number of image channels (RGB)


## Data Transforms & Loaders

In [34]:
#Common preprocessing transform
clf_tf = T.Compose([
    T.Resize(image_size),
    T.CenterCrop(image_size),
    T.ToTensor(),
    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# real-only loader
real_ds    = datasets.ImageFolder(str(REAL_ROOT), transform=clf_tf)
real_loader= DataLoader(real_ds, batch_size=batch_size, shuffle=True,  num_workers=2)

# Synthetic-only placeholder loader (will load 'melanoma' class)
syn_ds     = datasets.ImageFolder(str(SYN_ROOT), transform=clf_tf)
syn_loader = DataLoader(syn_ds,    batch_size=batch_size, shuffle=True,  num_workers=2)

# Combined augmented loader
aug_ds     = ConcatDataset([real_ds, syn_ds])
aug_loader = DataLoader(aug_ds,    batch_size=batch_size, shuffle=True,  num_workers=2)

# Test loader
test_ds    = datasets.ImageFolder(str(TEST_ROOT), transform=clf_tf)
test_loader= DataLoader(test_ds,   batch_size=batch_size, shuffle=False, num_workers=2)

print("Real classes:", real_ds.classes, "=>", len(real_ds))
print("Synth  classes:", syn_ds.classes, "=>", len(syn_ds))
print("Augmented total:", len(aug_ds))
print("Test   total:", len(test_ds))


Real classes: ['melanoma', 'nevus', 'seborrheic_keratosis'] => 2000
Synth  classes: ['melanoma'] => 1000
Augmented total: 3000
Test   total: 600


## DCGAN Generator Definition & Loading

In [35]:
# Define DCGAN Generator (matches training architecture)
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            # nz × 1 × 1 → ngf*8 × 4 × 4
            nn.ConvTranspose2d(nz, ngf*8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf*8), nn.ReLU(True),
            # → ngf*4 × 8 × 8
            nn.ConvTranspose2d(ngf*8, ngf*4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf*4), nn.ReLU(True),
            # → ngf*2 × 16 × 16
            nn.ConvTranspose2d(ngf*4, ngf*2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf*2), nn.ReLU(True),
            # → ngf × 32 × 32
            nn.ConvTranspose2d(ngf*2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf), nn.ReLU(True),
            # → nc=3 × 64 × 64
            nn.ConvTranspose2d(ngf, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    def forward(self, x):
        return self.main(x)


# Instantiate & load saved weights
generator = Generator().to(device)

gen_path = '/content/drive/MyDrive/ISIC2017/models/dcgan_generator_final.pth'
state    = torch.load(gen_path, map_location=device)
generator.load_state_dict(state)

generator.eval()
print("Loaded DCGAN generator from", gen_path)



Loaded DCGAN generator from /content/drive/MyDrive/ISIC2017/models/dcgan_generator_final.pth


## Define Classifier & Save Initial Weights

In [36]:
def make_classifier(num_classes):
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model.to(device)

# initial model & save its “fresh” state for reset
model = make_classifier(len(real_ds.classes))
torch.save(model.state_dict(), '/content/initial_resnet18.pth')

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)




## Training & Evaluation Routines

In [37]:
def train_one_epoch(dataloader):
    model.train()
    total_loss = 0
    for imgs, labs in dataloader:
        imgs, labs = imgs.to(device), labs.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss   = criterion(logits, labs)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    return total_loss / len(dataloader.dataset)

from sklearn.metrics import classification_report

@torch.no_grad()
def evaluate(loader):
    model.eval()
    all_preds, all_labels = [], []
    for imgs, labs in loader:
        imgs = imgs.to(device)
        logits = model(imgs)
        preds  = logits.argmax(dim=1).cpu().tolist()
        all_preds.extend(preds)
        all_labels.extend(labs.tolist())
    return all_labels, all_preds


##Baseline (Real-Only) Training & Eval

In [18]:
# reload fresh model & optimizer
model.load_state_dict(torch.load('/content/initial_resnet18.pth'))
optimizer = optim.Adam(model.parameters(), lr=lr)

print("=> Baseline (real-only) training")
for epoch in range(1, num_epochs+1):
    loss = train_one_epoch(real_loader)
    print(f"[Baseline] Epoch {epoch}/{num_epochs}  Loss: {loss:.4f}")

y_true, y_pred = evaluate(test_loader)
print("\nBaseline test report:")
print(classification_report(y_true, y_pred, target_names=real_ds.classes))


→ Baseline (real-only) training
[Baseline] Epoch 1/5  Loss: 0.6881
[Baseline] Epoch 2/5  Loss: 0.2142
[Baseline] Epoch 3/5  Loss: 0.0660
[Baseline] Epoch 4/5  Loss: 0.0230
[Baseline] Epoch 5/5  Loss: 0.0124

Baseline test report:
                      precision    recall  f1-score   support

            melanoma       0.55      0.31      0.40       117
               nevus       0.73      0.88      0.80       393
seborrheic_keratosis       0.45      0.29      0.35        90

            accuracy                           0.68       600
           macro avg       0.58      0.49      0.51       600
        weighted avg       0.65      0.68      0.65       600



In [19]:
import torchvision.utils as vutils

out_dir = SYN_ROOT / 'melanoma'
out_dir.mkdir(exist_ok=True, parents=True)

generator.eval()
noise = torch.randn(500, nz, 1, 1, device=device)
fake_imgs = generator(noise).cpu()
fake_imgs = (fake_imgs + 1) / 2     # rescale [-1,1]→[0,1]

for i, img in enumerate(fake_imgs):
    vutils.save_image(img, out_dir / f"dcgan_{i:03d}.png")
print(" Generated 500 synthetic melanoma images")


 Generated 500 synthetic melanoma images


## Augmentation Subsampling Helper

In [42]:
from torch.utils.data import Subset

def make_aug_loader(n_syn: int):
    full_syn = datasets.ImageFolder(str(SYN_ROOT), transform=clf_tf)
    N = len(full_syn)
    if n_syn > N:
        raise ValueError(f"Requested {n_syn} synthetic > available {N}")
    idxs   = np.random.choice(N, size=n_syn, replace=False).tolist()
    syn_sub= Subset(full_syn, idxs)
    aug_ds = ConcatDataset([real_ds, syn_sub])
    loader = DataLoader(
        aug_ds, batch_size=batch_size, shuffle=True,
        num_workers=2, pin_memory=True
    )
    print(f"Augmented: real {len(real_ds)} + syn {n_syn} = {len(aug_ds)}")
    return loader

In [44]:
for n_syn in [200, 500]:
    print(f"\n===== Augmented run with {n_syn} synthetic images =====")
    # reset model & optimizer
    model.load_state_dict(torch.load('/content/initial_resnet18.pth', map_location=device))
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # build a loader with exactly n_syn fakes
    loader = make_aug_loader(n_syn)

    # train
    for epoch in range(1, num_epochs+1):
        loss = train_one_epoch(loader)
        print(f"[Aug ({n_syn})] Epoch {epoch}/{num_epochs}  Loss: {loss:.4f}")

    # eval
    y_true_aug, y_pred_aug = evaluate(test_loader)
    print(f"\nTest report (n_syn={n_syn}):")
    print(classification_report(y_true_aug, y_pred_aug, target_names=real_ds.classes))



===== Augmented run with 200 synthetic images =====
Augmented: real 2000 + syn 200 = 2200
[Aug (200)] Epoch 1/5  Loss: 0.7158
[Aug (200)] Epoch 2/5  Loss: 0.2017
[Aug (200)] Epoch 3/5  Loss: 0.0618
[Aug (200)] Epoch 4/5  Loss: 0.0260
[Aug (200)] Epoch 5/5  Loss: 0.0159

Test report (n_syn=200):
                      precision    recall  f1-score   support

            melanoma       0.33      0.71      0.45       117
               nevus       0.79      0.56      0.66       393
seborrheic_keratosis       0.39      0.31      0.35        90

            accuracy                           0.55       600
           macro avg       0.50      0.53      0.48       600
        weighted avg       0.64      0.55      0.57       600


===== Augmented run with 500 synthetic images =====
Augmented: real 2000 + syn 500 = 2500
[Aug (500)] Epoch 1/5  Loss: 0.6299
[Aug (500)] Epoch 2/5  Loss: 0.1885
[Aug (500)] Epoch 3/5  Loss: 0.0661
[Aug (500)] Epoch 4/5  Loss: 0.0639
[Aug (500)] Epoch 5/5  Loss: 0.

In [45]:
torch.save(model.state_dict(), str(BASE / 'final_resnet18_dcgan_aug.pth'))
print("Saved augmented model weights")

Saved augmented model weights
