# Segformer

Research Paper: https://arxiv.org/abs/2105.15203

Datasets: https://data.mendeley.com/datasets/8gf9vpkhgy/2

Implementation adapted from:
1. https://github.com/NVlabs/SegFormer
2. https://debuggercafe.com/road-segmentation-using-segformer/
3. https://www.kaggle.com/code/andrewkettle/pytorch-segformer-and-sam-on-kindey-1

In [1]:
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import torch.optim.lr_scheduler as lr_scheduler
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

## Section 1: Datasets Processing

In [2]:
class Load_Datasets(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transform:
            transform = self.transform(image=image, mask=mask)
            image = transform['image']
            mask = transform['mask']

        image = image.float()/255.0
        mask = mask.long()
        return image, mask

In [3]:
batch_size=1

transform = A.Compose([
    A.Resize(128, 128),
    ToTensorV2()
], is_check_shapes=False)

### Part 1: Darwin Dataset

In [4]:
darwin_dataset = Load_Datasets(image_dir='./Datasets/Darwin/img', mask_dir='./Datasets/Darwin/mask', transform=transform)
train, test = train_test_split(darwin_dataset, test_size=0.2)

darwin_train = DataLoader(train, batch_size=batch_size, shuffle=True)
darwin_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 2: Shenzhen Dataset

In [5]:
shenzhen_dataset = Load_Datasets(image_dir='./Datasets/Shenzhen/img', mask_dir='./Datasets/Shenzhen/mask', transform=transform)
train, test = train_test_split(shenzhen_dataset, test_size=0.2)

shenzhen_train = DataLoader(train, batch_size=batch_size, shuffle=True)
shenzhen_test = DataLoader(test, batch_size=batch_size, shuffle=True)

### Part 3: Covid-19 Dataset

In [6]:
covid_dataset = Load_Datasets(image_dir='./Datasets/Covid-19/Covid/img', mask_dir='./Datasets/Covid-19/Covid/mask', transform=transform)
train, test = train_test_split(covid_dataset, test_size=0.2)

covid_train = DataLoader(train, batch_size=batch_size, shuffle=True)
covid_test = DataLoader(test, batch_size=batch_size, shuffle=True)

## Section 2: Model Implementation

In [7]:
def train_model(train_data):
    epochs = 5
    learning_rate = 0.01
    gradient_accumulation_steps = 2

    config = SegformerConfig(num_labels=1)
    model = SegformerForSemanticSegmentation.from_pretrained('nvidia/mit-b0', config=config)

    #device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # Check for device
    #model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40], gamma=0.1)

    # Train network
    for ep in range(epochs):
        model.train()
        train_loss = []

        with torch.set_grad_enabled(True):
            for idx, (images, masks) in enumerate(tqdm(train_data)):
                # Convert vars to GPU
                images = images.float()
                masks = masks.type(torch.LongTensor)
                output = model(pixel_values=images, labels=masks)

                loss = output.loss
                loss = loss / gradient_accumulation_steps
                train_loss.append(loss)

                if (idx + 1) % gradient_accumulation_steps == 0:
                    optimizer.step()
                    scheduler.step()
                    optimizer.zero_grad()

        train_loss = loss.detach().numpy()
        print(f"Epoch [{ep+1}/{epochs}]. Training Loss [{np.mean(train_loss)}]")
        
        
    return model

### Part 1: Darwin Dataset

In [8]:
darwin_model = train_model(darwin_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 4884/4884 [04:00<00:00, 20.27it/s]


Epoch [1/5]. Training Loss [0.2892589867115021]


100%|██████████| 4884/4884 [04:17<00:00, 18.98it/s]


Epoch [2/5]. Training Loss [0.2976530194282532]


100%|██████████| 4884/4884 [04:24<00:00, 18.44it/s]


Epoch [3/5]. Training Loss [0.26200711727142334]


100%|██████████| 4884/4884 [04:13<00:00, 19.28it/s]


Epoch [4/5]. Training Loss [0.31198883056640625]


100%|██████████| 4884/4884 [04:41<00:00, 17.37it/s]


Epoch [5/5]. Training Loss [0.23788407444953918]


### Part 2: Shenzhen Dataset

In [9]:
shenzhen_model = train_model(shenzhen_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 452/452 [00:13<00:00, 33.70it/s]


Epoch [1/5]. Training Loss [0.26572200655937195]


100%|██████████| 452/452 [00:12<00:00, 34.87it/s]


Epoch [2/5]. Training Loss [0.25256550312042236]


100%|██████████| 452/452 [00:12<00:00, 35.35it/s]


Epoch [3/5]. Training Loss [0.27018022537231445]


100%|██████████| 452/452 [00:12<00:00, 35.49it/s]


Epoch [4/5]. Training Loss [0.25455427169799805]


100%|██████████| 452/452 [00:12<00:00, 36.18it/s]


Epoch [5/5]. Training Loss [0.2700246572494507]


### Part 3: Covid-19 Dataset

In [10]:
covid_model = train_model(covid_train)

Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 2892/2892 [01:29<00:00, 32.19it/s]


Epoch [1/5]. Training Loss [0.23130542039871216]


100%|██████████| 2892/2892 [01:23<00:00, 34.65it/s]


Epoch [2/5]. Training Loss [0.30832406878471375]


100%|██████████| 2892/2892 [01:24<00:00, 34.25it/s]


Epoch [3/5]. Training Loss [0.2860661745071411]


100%|██████████| 2892/2892 [01:21<00:00, 35.45it/s]


Epoch [4/5]. Training Loss [0.3316287398338318]


100%|██████████| 2892/2892 [01:27<00:00, 33.20it/s]


Epoch [5/5]. Training Loss [0.25208407640457153]


## Section 3: Model Evaluation

In [14]:
def evaluate_model(model, val_data):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # Check for device
    model = model.to(device)

    model.eval()
    ious, accuracies, recalls, f1s = [], [], [], []
    
    with torch.set_grad_enabled(True):
        for images, masks in tqdm(val_data):
            images = images.float().to(device)
            masks = masks.type(torch.LongTensor).to(device)

            outputs = model(pixel_values=images)
            pred_masks = outputs.logits.argmax(dim=1)

            for pred_mask, true_mask in zip(pred_masks, masks):
                pred_mask_resized = F.interpolate(pred_mask.unsqueeze(0).unsqueeze(0).float(), size=true_mask.shape[-2:], mode='nearest').squeeze().cpu().numpy()
                true_mask_np = true_mask.cpu().numpy()

                iou = precision_score(true_mask_np.flatten(), pred_mask_resized.flatten(), average='micro')
                accuracy = accuracy_score(true_mask_np.flatten(), pred_mask_resized.flatten())
                recall = recall_score(true_mask_np.flatten(), pred_mask_resized.flatten(), average='micro')
                f1 = f1_score(true_mask_np.flatten(), pred_mask_resized.flatten(), average='micro')

                ious.append(iou)
                accuracies.append(accuracy)
                recalls.append(recall)
                f1s.append(f1)

    mean_iou = np.mean(ious)
    mean_accuracy = np.mean(accuracies)
    mean_recall = np.mean(recalls)
    mean_f1 = np.mean(f1s)

    return mean_iou, mean_accuracy, mean_recall, mean_f1

### Part 1: Darwin Dataset

In [15]:
mean_iou, mean_accuracy, mean_recall, mean_f1 = evaluate_model(darwin_model, darwin_test)

print(f"Validation Metrics: IoU: {mean_iou}, Accuracy: {mean_accuracy}, Recall: {mean_recall}, F1 Score: {mean_f1}")

100%|██████████| 1222/1222 [00:34<00:00, 35.35it/s]

Validation Metrics: IoU: 0.6706170364604644, Accuracy: 0.6706170364604644, Recall: 0.6706170364604644, F1 Score: 0.6706170364604644





### Part 2: Shenzhen Dataset

In [17]:
mean_iou, mean_accuracy, mean_recall, mean_f1 = evaluate_model(shenzhen_model, shenzhen_test)

print(f"Validation Metrics: IoU: {mean_iou}, Accuracy: {mean_accuracy}, Recall: {mean_recall}, F1 Score: {mean_f1}")

100%|██████████| 114/114 [00:03<00:00, 35.75it/s]

Validation Metrics: IoU: 0.7442428856565241, Accuracy: 0.7442428856565241, Recall: 0.7442428856565241, F1 Score: 0.7442428856565241





### Part 3: Covid-19 Dataset

In [18]:
mean_iou, mean_accuracy, mean_recall, mean_f1 = evaluate_model(covid_model, covid_test)

print(f"Validation Metrics: IoU: {mean_iou}, Accuracy: {mean_accuracy}, Recall: {mean_recall}, F1 Score: {mean_f1}")

100%|██████████| 724/724 [00:24<00:00, 29.96it/s]

Validation Metrics: IoU: 0.7588145219160048, Accuracy: 0.7588145219160048, Recall: 0.7588145219160048, F1 Score: 0.7588145219160048



