# Segformer

Research Paper: https://arxiv.org/abs/2105.15203

Datasets: https://data.mendeley.com/datasets/8gf9vpkhgy/2

Implementation adapted from:
1. https://github.com/NVlabs/SegFormer
2. https://debuggercafe.com/road-segmentation-using-segformer/
3. https://www.kaggle.com/code/andrewkettle/pytorch-segformer-and-sam-on-kindey-1

In [3]:
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig, Trainer, TrainingArguments
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import jaccard_score, accuracy_score, f1_score

KeyboardInterrupt: 

## Section 1: Datasets Processing

In [None]:
class Load_Datasets(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            transform = self.transform(image=image, mask=mask)
            image = transform['image']
            mask = transform['mask']

        image = image.float()/255.0
        mask = mask.long()
        return image, mask

In [None]:
batch_size=4

transform = A.Compose([
    A.Resize(256, 256),
    ToTensorV2()
], is_check_shapes=False)

### Part 1: Darwin Dataset

In [None]:
darwin_dataset = Load_Datasets(image_dir='./Datasets/Darwin/img', mask_dir='./Datasets/Darwin/mask', transform=transform)
train, test = train_test_split(darwin_dataset, test_size=0.1)

darwin_train = DataLoader(train, batch_size=batch_size, shuffle=True)
darwin_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 2: Shenzhen Dataset

In [None]:
shenzhen_dataset = Load_Datasets(image_dir='./Datasets/Shenzhen/img', mask_dir='./Datasets/Shenzhen/mask', transform=transform)
train, test = train_test_split(shenzhen_dataset, test_size=0.1)

shenzhen_train = DataLoader(train, batch_size=batch_size, shuffle=True)
shenzhen_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 3: Covid-19 Dataset

In [None]:
covid_dataset = Load_Datasets(image_dir='./Datasets/Covid-19/Covid/img', mask_dir='./Datasets/Covid-19/Covid/mask', transform=transform)
train, test = train_test_split(covid_dataset, test_size=0.1)

covid_train = DataLoader(train, batch_size=batch_size, shuffle=True)
covid_test = DataLoader(test, batch_size=batch_size, shuffle=True)

## Section 2: Model Implementation

In [None]:
def train_model(train_data, val_data):
    epochs = 10
    learning_rate = 0.0025

    config = SegformerConfig(num_labels=1)
    model = SegformerForSemanticSegmentation.from_pretrained('nvidia/mit-b0', config=config)

    device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # Check for device
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40], gamma=0.1)

    # Train network
    for ep in range(epochs):
        model.train()
        train_loss = []

        # Training
        for idx, (images, masks) in enumerate(tqdm(train_data)):
            # Convert vars to GPU
            images = images.float().to(device)
            masks = masks.type(torch.LongTensor).to(device)
            output = model(pixel_values=images, labels=masks)

            loss = output.loss
            train_loss.append(loss)

            optimizer.step()
            optimizer.zero_grad()

        print(f"Epoch [{ep+1}/{epochs}]. Training Loss [{np.mean(train_loss)}]")
        scheduler.step()
        
    return model

### Part 1: Darwin Dataset

In [None]:
darwin_model = train_model(darwin_train, darwin_test)

NameError: name 'darwin_train' is not defined

### Part 2: Shenzhen Dataset

In [None]:
shenzhen_model = train_model(shenzhen_train, shenzhen_test)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch [1/10]


100%|██████████| 128/128 [00:08<00:00, 15.65it/s]
100%|██████████| 15/15 [00:00<00:00, 15.79it/s]


Epoch [2/10]


100%|██████████| 128/128 [00:07<00:00, 16.05it/s]
100%|██████████| 15/15 [00:00<00:00, 16.48it/s]


Epoch [3/10]


100%|██████████| 128/128 [00:07<00:00, 16.14it/s]
100%|██████████| 15/15 [00:00<00:00, 16.46it/s]


Epoch [4/10]


100%|██████████| 128/128 [00:07<00:00, 16.15it/s]
100%|██████████| 15/15 [00:00<00:00, 16.52it/s]


Epoch [5/10]


100%|██████████| 128/128 [00:08<00:00, 16.00it/s]
100%|██████████| 15/15 [00:00<00:00, 16.72it/s]


Epoch [6/10]


100%|██████████| 128/128 [00:07<00:00, 16.08it/s]
100%|██████████| 15/15 [00:00<00:00, 16.67it/s]


Epoch [7/10]


100%|██████████| 128/128 [00:08<00:00, 16.00it/s]
100%|██████████| 15/15 [00:00<00:00, 16.72it/s]


Epoch [8/10]


100%|██████████| 128/128 [00:07<00:00, 16.12it/s]
100%|██████████| 15/15 [00:00<00:00, 16.71it/s]


Epoch [9/10]


100%|██████████| 128/128 [00:07<00:00, 16.06it/s]
100%|██████████| 15/15 [00:00<00:00, 15.92it/s]


Epoch [10/10]


100%|██████████| 128/128 [00:07<00:00, 16.12it/s]
100%|██████████| 15/15 [00:00<00:00, 16.41it/s]


### Part 3: Covid-19 Dataset

In [None]:
covid_model = train_model(covid_train, covid_test)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch [1/10]


100%|██████████| 814/814 [00:51<00:00, 15.87it/s]
100%|██████████| 91/91 [00:05<00:00, 16.18it/s]


Epoch [2/10]


100%|██████████| 814/814 [00:51<00:00, 15.77it/s]
100%|██████████| 91/91 [00:05<00:00, 15.52it/s]


Epoch [3/10]


100%|██████████| 814/814 [00:53<00:00, 15.20it/s]
100%|██████████| 91/91 [00:06<00:00, 14.81it/s]


Epoch [4/10]


100%|██████████| 814/814 [00:54<00:00, 14.83it/s]
100%|██████████| 91/91 [00:05<00:00, 15.24it/s]


Epoch [5/10]


100%|██████████| 814/814 [00:54<00:00, 14.83it/s]
100%|██████████| 91/91 [00:05<00:00, 15.47it/s]


Epoch [6/10]


100%|██████████| 814/814 [00:55<00:00, 14.73it/s]
100%|██████████| 91/91 [00:06<00:00, 15.05it/s]


Epoch [7/10]


100%|██████████| 814/814 [00:53<00:00, 15.11it/s]
100%|██████████| 91/91 [00:06<00:00, 15.12it/s]


Epoch [8/10]


100%|██████████| 814/814 [00:53<00:00, 15.15it/s]
100%|██████████| 91/91 [00:06<00:00, 15.13it/s]


Epoch [9/10]


100%|██████████| 814/814 [00:52<00:00, 15.38it/s]
100%|██████████| 91/91 [00:05<00:00, 15.77it/s]


Epoch [10/10]


100%|██████████| 814/814 [00:52<00:00, 15.60it/s]
100%|██████████| 91/91 [00:05<00:00, 15.78it/s]


## Section 3: Model Evaluation

In [None]:
def evaluate_model(model, test_data):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    model.eval()

    all_preds = []
    all_labels = []

    for idx, (images, masks) in enumerate(tqdm(test_data)):
        # Convert vars to GPU
        images = images.float().to(device)
        masks = masks.type(torch.LongTensor).to(device)
        output = model(pixel_values=images).logits
        preds = torch.argmax(output, dim=1).squeeze().cpu().numpy()
        labels = masks.squeeze().cpu().numpy()

        all_preds.append(preds)
        all_labels.append(labels)

    all_preds = np.concatenate(all_preds, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    metrics = compute_metrics(all_preds, all_labels)
    print(metrics)

def compute_metrics(pred, true):
    pred_flat = pred.flatten()
    true_flat = true.flatten()

    iou = jaccard_score(true_flat, pred_flat, average='macro')
    accuracy = accuracy_score(true_flat, pred_flat)
    dice = f1_score(true_flat, pred_flat, average='macro')

    return {
        'IoU': iou,
        'Pixel Accuracy': accuracy,
        'Dice Coefficient': dice
    }

### Part 1: Darwin Dataset

In [None]:
evaluate_model(darwin_model, darwin_test)

### Part 2: Shenzhen Dataset

In [None]:
evaluate_model(shenzhen_model, shenzhen_test)

### Part 3: Covid-19 Dataset

In [None]:
evaluate_model(covid_model, covid_test)