# Segformer

Research Paper: https://arxiv.org/abs/2105.15203

Datasets: https://data.mendeley.com/datasets/8gf9vpkhgy/2

Implementation adapted from:
1. https://github.com/NVlabs/SegFormer
2. https://debuggercafe.com/road-segmentation-using-segformer/
3. https://www.kaggle.com/code/andrewkettle/pytorch-segformer-and-sam-on-kindey-1
4. https://medium.com/geekculture/semantic-segmentation-with-segformer-2501543d2be4

In [118]:
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig, SegformerFeatureExtractor, SegformerModel
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, jaccard_score
from tabulate import tabulate

## Section 1: Datasets Processing

In [119]:
class Load_Datasets(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            transform = self.transform(image=image, mask=mask)
            image = transform['image']
            mask = transform['mask']

        image = image.float()/255.0
        mask = mask.long()
        return image, mask

In [120]:
batch_size=4

transform = A.Compose([
    A.Resize(128, 128),
    ToTensorV2()
], is_check_shapes=False)

### Part 1: Darwin Dataset

In [121]:
darwin_dataset = Load_Datasets(image_dir='./Datasets/Darwin/img', mask_dir='./Datasets/Darwin/mask', transform=transform)
train, test = train_test_split(darwin_dataset, test_size=0.2)

darwin_train = DataLoader(train, batch_size=batch_size, shuffle=True)
darwin_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 2: Shenzhen Dataset

In [122]:
shenzhen_dataset = Load_Datasets(image_dir='./Datasets/Shenzhen/img', mask_dir='./Datasets/Shenzhen/mask', transform=transform)
train, test = train_test_split(shenzhen_dataset, test_size=0.2)

shenzhen_train = DataLoader(train, batch_size=batch_size, shuffle=True)
shenzhen_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 3: Covid-19 Dataset

In [123]:
covid_dataset = Load_Datasets(image_dir='./Datasets/Covid-19/Covid/img', mask_dir='./Datasets/Covid-19/Covid/mask', transform=transform)
train, test = train_test_split(covid_dataset, test_size=0.2)

covid_train = DataLoader(train, batch_size=batch_size, shuffle=True)
covid_test = DataLoader(test, batch_size=batch_size, shuffle=True)

## Section 2: Model Implementation

In [124]:
def train_model(train_data):
    epochs = 10
    learning_rate = 0.0025

    config = SegformerConfig(num_labels=1)
    model = SegformerForSemanticSegmentation.from_pretrained('nvidia/mit-b0', config=config)
    model.train()

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    average_loss = []

    # Train network
    for ep in range(epochs):
        model_loss = []

        for images, masks in tqdm(train_data):
            images = images.float()
            masks = masks.type(torch.LongTensor)
            outputs = model(pixel_values=images, labels=masks)
        
            loss = outputs.loss
            model_loss.append(loss.item())
            optimizer.step()
            optimizer.zero_grad()

        model_loss = loss.detach().numpy()
        average_loss.append(np.mean(model_loss))
        print(f"Epoch [{ep+1}/{epochs}]. Training Loss [{np.mean(model_loss)}]")
    
    train_loss = np.mean(average_loss)
    return model, train_loss

### Part 1: Darwin Dataset

In [125]:
darwin_model, darwin_loss = train_model(darwin_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 1221/1221 [00:34<00:00, 35.40it/s]


Epoch [1/10]. Training Loss [0.5331524610519409]


100%|██████████| 1221/1221 [00:34<00:00, 35.12it/s]


Epoch [2/10]. Training Loss [0.537145733833313]


100%|██████████| 1221/1221 [00:34<00:00, 35.12it/s]


Epoch [3/10]. Training Loss [0.5325087308883667]


100%|██████████| 1221/1221 [00:34<00:00, 35.44it/s]


Epoch [4/10]. Training Loss [0.5162166357040405]


100%|██████████| 1221/1221 [00:34<00:00, 35.55it/s]


Epoch [5/10]. Training Loss [0.5132641792297363]


100%|██████████| 1221/1221 [00:34<00:00, 35.43it/s]


Epoch [6/10]. Training Loss [0.5175669193267822]


100%|██████████| 1221/1221 [00:34<00:00, 35.50it/s]


Epoch [7/10]. Training Loss [0.4609473943710327]


100%|██████████| 1221/1221 [00:34<00:00, 35.49it/s]


Epoch [8/10]. Training Loss [0.5346200466156006]


100%|██████████| 1221/1221 [00:34<00:00, 35.47it/s]


Epoch [9/10]. Training Loss [0.4969872832298279]


100%|██████████| 1221/1221 [00:34<00:00, 35.37it/s]

Epoch [10/10]. Training Loss [0.4747421443462372]





### Part 2: Shenzhen Dataset

In [126]:
shenzhen_model, shenzhen_loss = train_model(shenzhen_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 113/113 [00:03<00:00, 33.64it/s]


Epoch [1/10]. Training Loss [0.5412898063659668]


100%|██████████| 113/113 [00:03<00:00, 34.50it/s]


Epoch [2/10]. Training Loss [0.5053495168685913]


100%|██████████| 113/113 [00:03<00:00, 34.45it/s]


Epoch [3/10]. Training Loss [0.5360866785049438]


100%|██████████| 113/113 [00:03<00:00, 34.48it/s]


Epoch [4/10]. Training Loss [0.533100962638855]


100%|██████████| 113/113 [00:03<00:00, 34.43it/s]


Epoch [5/10]. Training Loss [0.5149660110473633]


100%|██████████| 113/113 [00:03<00:00, 34.68it/s]


Epoch [6/10]. Training Loss [0.5329985618591309]


100%|██████████| 113/113 [00:03<00:00, 34.18it/s]


Epoch [7/10]. Training Loss [0.4946966767311096]


100%|██████████| 113/113 [00:03<00:00, 34.17it/s]


Epoch [8/10]. Training Loss [0.5156334638595581]


100%|██████████| 113/113 [00:03<00:00, 34.30it/s]


Epoch [9/10]. Training Loss [0.5191201567649841]


100%|██████████| 113/113 [00:03<00:00, 34.39it/s]

Epoch [10/10]. Training Loss [0.5418694019317627]





### Part 3: Covid-19 Dataset

In [127]:
covid_model, covid_loss = train_model(covid_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 723/723 [00:20<00:00, 34.56it/s]


Epoch [1/10]. Training Loss [0.5102677941322327]


100%|██████████| 723/723 [00:20<00:00, 35.39it/s]


Epoch [2/10]. Training Loss [0.5085970163345337]


100%|██████████| 723/723 [00:20<00:00, 35.38it/s]


Epoch [3/10]. Training Loss [0.5112752914428711]


100%|██████████| 723/723 [00:20<00:00, 35.46it/s]


Epoch [4/10]. Training Loss [0.5343801379203796]


100%|██████████| 723/723 [00:20<00:00, 35.42it/s]


Epoch [5/10]. Training Loss [0.550214946269989]


100%|██████████| 723/723 [00:20<00:00, 35.37it/s]


Epoch [6/10]. Training Loss [0.5354107618331909]


100%|██████████| 723/723 [00:20<00:00, 35.45it/s]


Epoch [7/10]. Training Loss [0.4743126630783081]


100%|██████████| 723/723 [00:20<00:00, 35.39it/s]


Epoch [8/10]. Training Loss [0.4846624433994293]


100%|██████████| 723/723 [00:20<00:00, 35.41it/s]


Epoch [9/10]. Training Loss [0.5256443023681641]


100%|██████████| 723/723 [00:20<00:00, 35.51it/s]

Epoch [10/10]. Training Loss [0.5017141103744507]





## Section 3: Model Evaluation

In [128]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

def evaluate_model(model, val_data):
    model.eval()
    ious, accuracies, recalls, f1s = [], [], [], []
    
    for images, masks in tqdm(val_data):
        images = images.float()
        masks = masks.type(torch.LongTensor)

        outputs = model(pixel_values=images, labels=masks)
        logits = F.interpolate(outputs.logits, size=masks.shape[-2:], mode="bilinear", align_corners=False)
        prediction = logits.argmax(dim=1)

        for pred, true in zip(prediction, masks):
            pred_mask = pred.cpu().numpy()
            true_mask = true.cpu().numpy()

            iou = jaccard_score(true_mask.flatten(), pred_mask.flatten(), average='weighted')
            accuracy = accuracy_score(true_mask.flatten(), pred_mask.flatten())
            recall = recall_score(true_mask.flatten(), pred_mask.flatten(), average='weighted')
            f1 = f1_score(true_mask.flatten(), pred_mask.flatten(), average='weighted')

            ious.append(iou)
            accuracies.append(accuracy)
            recalls.append(recall)
            f1s.append(f1)

    mean_iou = np.mean(ious)
    mean_accuracy = np.mean(accuracies)
    mean_recall = np.mean(recalls)
    mean_f1 = np.mean(f1s)

    return mean_iou, mean_accuracy, mean_recall, mean_f1

### Part 1: Darwin Dataset

In [129]:
darwin_iou, darwin_accuracy, darwin_recall, darwin_f1 = evaluate_model(darwin_model, darwin_test)

print(f"Validation Metrics: IoU: {darwin_iou}, Accuracy: {darwin_accuracy}, Recall: {darwin_recall}, F1 Score: {darwin_f1}")

100%|██████████| 306/306 [00:19<00:00, 15.75it/s]

Validation Metrics: IoU: 0.4568388419783691, Accuracy: 0.6718928310562858, Recall: 0.6718928310562858, F1 Score: 0.5422873118398244





### Part 2: Shenzhen Dataset

In [130]:
shenzhen_iou, shenzhen_accuracy, shenzhen_recall, shenzhen_f1 = evaluate_model(shenzhen_model, shenzhen_test)

print(f"Validation Metrics: IoU: {shenzhen_iou}, Accuracy: {shenzhen_accuracy}, Recall: {shenzhen_recall}, F1 Score: {shenzhen_f1}")

100%|██████████| 29/29 [00:01<00:00, 16.45it/s]

Validation Metrics: IoU: 0.5555349294292299, Accuracy: 0.7429841694078947, Recall: 0.7429841694078947, F1 Score: 0.6347583931046619





### Part 3: Covid-19 Dataset

In [131]:
covid_iou, covid_accuracy, covid_recall, covid_f1 = evaluate_model(covid_model, covid_test)

print(f"Validation Metrics: IoU: {covid_iou}, Accuracy: {covid_accuracy}, Recall: {covid_recall}, F1 Score: {covid_f1}")

100%|██████████| 181/181 [00:12<00:00, 14.37it/s]

Validation Metrics: IoU: 0.5774161963677291, Accuracy: 0.7566240848098671, Recall: 0.7566240848098671, F1 Score: 0.6536815465378706





### Part 4: Conclusion

In [132]:
results_table = [
    ["Darwin", darwin_loss, darwin_iou, darwin_accuracy, darwin_recall, darwin_f1],
    ["Zhenshen", shenzhen_loss, shenzhen_iou, shenzhen_accuracy, shenzhen_recall, shenzhen_f1],
    ["Covid-19", covid_loss, covid_iou, covid_accuracy, covid_recall, covid_f1]
]

head = ["Datasets", "Average Training Loss", "IoU Score", " Accuracy Score", "Recall Score", "F-1 score"]

print(tabulate(results_table, headers=head, tablefmt="grid"))

+------------+-------------------------+-------------+-------------------+----------------+-------------+
| Datasets   |   Average Training Loss |   IoU Score |    Accuracy Score |   Recall Score |   F-1 score |
| Darwin     |                0.511715 |    0.456839 |          0.671893 |       0.671893 |    0.542287 |
+------------+-------------------------+-------------+-------------------+----------------+-------------+
| Zhenshen   |                0.523511 |    0.555535 |          0.742984 |       0.742984 |    0.634758 |
+------------+-------------------------+-------------+-------------------+----------------+-------------+
| Covid-19   |                0.513648 |    0.577416 |          0.756624 |       0.756624 |    0.653682 |
+------------+-------------------------+-------------+-------------------+----------------+-------------+
