
# 🏁 SegFormer Fine-tuning for Semantic Segmentation

In this notebook, we demonstrate how to fine-tune **SegFormer**, a Vision Transformer-based segmentation model using PyTorch and Hugging Face.

We'll:
- Load an image segmentation dataset from Hugging Face (`datasets`)
- Preprocess using `SegformerImageProcessor`
- Train a SegFormer model
- Evaluate using mean IoU


In [None]:

!pip install -q transformers datasets evaluate torch torchvision timm


In [None]:

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from transformers import AdamW, get_scheduler
from datasets import load_dataset
from PIL import Image
import numpy as np
import evaluate


In [None]:

class SegmentationDataset(Dataset):
    def __init__(self, hf_ds, processor):
        self.ds = hf_ds
        self.processor = processor

    def __len__(self):
        return len(self.ds)

    def __getitem__(self, idx):
        item = self.ds[idx]
        image = Image.fromarray(item['image'])
        mask = Image.fromarray(item['segmentation_mask'])
        encoded = self.processor(image, mask, return_tensors="pt")
        return {k: v.squeeze() for k, v in encoded.items()}


In [None]:

# Replace 'scene_parse_150' with your custom dataset if needed
dataset = load_dataset("scene_parse_150")
train_ds = dataset["train"].shuffle(seed=42).select(range(100))  # subset for demo
val_ds = dataset["validation"].select(range(50))


In [None]:

checkpoint = "nvidia/segformer-b0-finetuned-ade-512-512"
processor = SegformerImageProcessor.from_pretrained(checkpoint)
model = SegformerForSemanticSegmentation.from_pretrained(checkpoint)
model.train()


In [None]:

train_loader = DataLoader(SegmentationDataset(train_ds, processor), batch_size=4, shuffle=True)
val_loader = DataLoader(SegmentationDataset(val_ds, processor), batch_size=2)


In [None]:

optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 5
num_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_steps)


In [None]:

metric = evaluate.load("mean_iou")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:

for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    # Validation
    model.eval()
    metric.reset()
    for batch in val_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)
        preds = outputs.logits.argmax(dim=1).cpu().numpy()
        labels = batch["labels"].cpu().numpy()
        metric.add_batch(predictions=preds, references=labels)

    results = metric.compute(num_labels=model.config.num_labels, ignore_index=255, reduce_labels=False)
    print(f"Epoch {epoch+1}/{num_epochs} — mIoU: {results['mean_iou']:.4f}")
