In [None]:
!pip install datasets transformers albumentations huggingface_hub

In [None]:
# NEW ALGO #

"""
to deal with inconsistently labelled data
we will train our model on the original data
then we will use the output maps from this model
and combine them with original maps using (logical OR)
then re-train and check results

"""

In [None]:
from datasets import load_dataset

dataset = load_dataset("jaygala223/38-cloud-train-only-v1")

In [None]:
dataset

In [None]:
# # exclude bad labels

# bad_labels = [35, 36]

# dataset['train'] = dataset['train'].select(
#     (
#         i for i in range(dataset['train'].num_rows)
#         if i not in set(bad_labels)
#     )
# )

In [None]:
dataset['train'][2500]['image']

In [None]:
dataset['train'][2500]['label']

In [None]:
# shuffle + split dataset
dataset = dataset.shuffle(seed=1)
dataset = dataset["train"].train_test_split(test_size=0.0075)
train_ds = dataset["train"]
test_ds = dataset["test"]

In [None]:
print(train_ds)
print(test_ds)

In [None]:
# let's look at one example
example = train_ds[39]
image = example['image']
image

In [None]:
import numpy as np
np.array(image).shape

In [None]:
import numpy as np

# load corresponding ground truth segmentation map, which includes a label per pixel
segmentation_map = np.array(example['label'])/255
segmentation_map = np.array(segmentation_map, dtype=np.uint8)
segmentation_map

In [None]:
np.unique(segmentation_map)

In [None]:
from huggingface_hub import hf_hub_download
import json

repo_id = f"jaygala223/38-cloud-train-only-v1"
filename = "id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
id2label = {int(k):v for k,v in id2label.items()}
print(id2label)

In [None]:
def color_palette():
    """Color palette that maps each class to RGB values.

    This one is actually taken from ADE20k.
    """
    #since we only have 2 classes
    return [[102, 255, 0], [92, 0, 255]]

palette = color_palette()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

color_segmentation_map = np.zeros((segmentation_map.shape[0], segmentation_map.shape[1], 3), dtype=np.uint8) # height, width, 3
for label, color in enumerate(palette):
    color_segmentation_map[segmentation_map == label, :] = color
# Convert to BGR
ground_truth_color_seg = color_segmentation_map[..., ::-1]

img = np.array(image) * 0.5 + ground_truth_color_seg * 0.5
img = img.astype(np.uint8)

plt.figure(figsize=(15, 10))
plt.imshow(img)
plt.show()

In [None]:
import numpy as np
from torch.utils.data import Dataset

class ImageSegmentationDataset(Dataset):
    """Image segmentation dataset."""

    def __init__(self, dataset, transform):
        """
        Args:
            dataset
        """
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        original_image = np.array(self.dataset[idx]['image'])
        original_segmentation_map = np.array(self.dataset[idx]['label'])

        # adding one bottom most pixel as 255 since processor/feature_extractor
        # wont take labels without a positive (i.e. class: 1 or cloud)
        uniques = np.unique(original_segmentation_map)
        if sum(uniques) == 0:
            original_segmentation_map[-1, -1] = 255

        transformed = self.transform(image=original_image, mask=original_segmentation_map)
        image, segmentation_map = transformed['image'], transformed['mask']

        # convert to C, H, W
        image = image.transpose(2,0,1)

        return image, segmentation_map, original_image, original_segmentation_map

In [None]:
import albumentations as A


ADE_MEAN = np.array([100, 100, 100]) / 255
ADE_STD = np.array([100, 100, 100]) / 255

train_transform = A.Compose([
    # A.LongestMaxSize(max_size=384),
    # A.RandomCrop(width=100, height=100),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
])

test_transform = A.Compose([
    # A.Resize(width=100, height=100),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
])

# train_dataset = ImageSegmentationDataset(train_ds)
train_dataset = ImageSegmentationDataset(train_ds, transform=train_transform)
test_dataset = ImageSegmentationDataset(test_ds, transform=test_transform)
# test_dataset = ImageSegmentationDataset(test_ds)

In [None]:
# image, segmentation_map, _, _ = train_dataset[0]
image, segmentation_map, _, _ = train_dataset[222]
print(image.shape)
print(segmentation_map.shape)

In [None]:
from transformers import MaskFormerImageProcessor,Mask2FormerImageProcessor, AutoImageProcessor

# Create a preprocessor
# preprocessor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-tiny-ade-semantic",
#                                                   do_reduce_labels=False,
#                                                   do_resize=False, do_rescale=False, do_normalize=False)

size = {'longest_edge':384, 'shortest_edge':384}

#original
preprocessor = MaskFormerImageProcessor(ignore_index=0,
                                        do_reduce_labels=False,
                                        do_resize=False,
                                        do_rescale=False,
                                        do_normalize=True,
                                        size=size)

#my experiment
# preprocessor = Mask2FormerImageProcessor(ignore_index=0, do_reduce_labels=False, do_resize=False, do_rescale=False, do_normalize=False)

In [None]:
from torch.utils.data import DataLoader

def collate_fn(batch):
    inputs = list(zip(*batch))
    images = inputs[0]
    segmentation_maps = inputs[1]
    # this function pads the inputs to the same size,
    # and creates a pixel mask
    # actually padding isn't required here since we are cropping
    batch = preprocessor(
        images,
        segmentation_maps=segmentation_maps,
        return_tensors="pt",
    )

    batch["original_images"] = inputs[2]
    batch["original_segmentation_maps"] = inputs[3]

    return batch

In [None]:

# train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# batch size more than 4 causes CUDA out of memory error

train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=2, shuffle=False, collate_fn=collate_fn, drop_last=True)

In [None]:
import torch

batch = next(iter(train_dataloader))

for k,v in batch.items():
  if isinstance(v, torch.Tensor):

    print(k,v.shape)
  else:
    print(k,v[0].shape)

In [None]:
from transformers import MaskFormerForInstanceSegmentation, Mask2FormerForUniversalSegmentation

# Replace the head of the pre-trained model

# model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-base-IN21k-ade-semantic",
#                                                             id2label=id2label,
#                                                             ignore_mismatched_sizes=True)


from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation

# feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b3-finetuned-ade-512-512")
# model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b3-finetuned-ade-512-512")

# model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-base-ade-semantic",
#                                                           id2label=id2label,
#                                                           ignore_mismatched_sizes=True)

# model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade",
#                                                           id2label=id2label,
#                                                           ignore_mismatched_sizes=True)

# model = MaskFormerForInstanceSegmentation.from_pretrained("jaygala223/maskformer-finetuned-for-38-cloud-dataset",
#                                                           id2label=id2label,
#                                                           ignore_mismatched_sizes=True)

In [None]:
from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation, AutoImageProcessor, UperNetForSemanticSegmentation
from PIL import Image
import requests

size = {'longest_edge':384, 'shortest_edge':383}

# load MaskFormer fine-tuned on COCO panoptic segmentation
# feature_extractor = Mask2FormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-ade",
#                                                                size=size,
#                                                                )

# model 1 is for generating new pred segmentation maps... model 2 is to train on new pred labels

image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-swin-tiny")
original_model = UperNetForSemanticSegmentation.from_pretrained("jaygala223/upernet-swin-tiny-finetuned-for-38-cloud-dataset")
# model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-swin-base")
new_model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-swin-tiny")

In [None]:
def calculate_iou_for_logical_or(pred_mask_list, true_mask_list):
    
    labels_list = []

    for i in range(labels.size(0)):
        labels[i].int()
        labels_list.append(labels[i].to(device))
    
    total_iou = 0.0
    num_masks = len(pred_mask_list)

    for pred_mask, true_mask in zip(pred_mask_list, labels_list):
        pred_mask = pred_mask.cpu().int()
        true_mask = true_mask.cpu().int()
        intersection = torch.logical_and(pred_mask, true_mask).sum()
        union = torch.logical_or(pred_mask, true_mask).sum()
        
        iou = intersection.float() / union.float() if union != 0 else 0.0
        total_iou += iou.item()

    avg_iou = total_iou / num_masks
    return avg_iou

In [None]:
import torch
device = "cuda"
torch.cuda.empty_cache()
batch = next(iter(train_dataloader))

images, labels = batch['original_images'], batch['original_segmentation_maps']

# first convert to np array then to tensor... because list to tensor is a slow operation
images = np.array(images)
images = torch.tensor(images)/255
labels = np.array(labels)
labels = torch.tensor(labels)/255

images.to(device)
labels.to(device)
original_model.to(device)
new_model.to(device)

target_sizes = [(image.shape[0], image.shape[1]) for image in images]

inputs1 = image_processor(images = images, segmentation_maps = labels, return_tensors = 'pt')
inputs1.to(device)

outputs1 = original_model(**inputs1)

predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs1,
                                                                                  target_sizes=target_sizes)



labels.to("cpu")
inputs1.to("cpu")

pred_maps = torch.stack(predicted_segmentation_maps).to("cpu")

# Logical OR
if outputs1.loss.item() > 2.0:
    new_labels = labels.int() | pred_maps.int()
else: new_labels = labels

inputs2 = image_processor(images = images, segmentation_maps = new_labels.to(device), return_tensors = 'pt')

inputs2.to(device)

outputs2 = new_model(**inputs2)

print("done!")
torch.cuda.empty_cache()

In [None]:
print(outputs1.loss)
print(outputs2.loss)

TRAINING

In [None]:
# UPER NET with LOGICAL OR

import torch
from tqdm.auto import tqdm
from torch.optim.lr_scheduler import StepLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cuda"
original_model.to(device)
new_model.to(device)

optimizer = torch.optim.Adam(new_model.parameters(), lr=5e-5)
scheduler = StepLR(optimizer, step_size=1000, gamma=0.1)

running_loss = 0.0
num_samples = 0

for epoch in range(3):
  print("Epoch:", epoch)
  new_model.train()
  logical_or_counter = 0
  for idx, batch in enumerate(tqdm(train_dataloader)):
      # Reset the parameter gradients
      optimizer.zero_grad()

      images, labels = batch['original_images'], batch['original_segmentation_maps']

      images = np.array(images)
      images = torch.tensor(images)
      labels = np.array(labels)
      labels = torch.tensor(labels)/255

      images.to(device)
      labels.to(device)

      target_sizes = [(image.shape[0], image.shape[1]) for image in images]

      inputs1 = image_processor(images = images, segmentation_maps = labels, return_tensors = 'pt')
      inputs1.to(device)

      outputs1 = original_model(**inputs1)    
    
      # generate new labels
      predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs1,
                                                                                      target_sizes=target_sizes)

    
      labels.to("cpu")
      inputs1.to("cpu")

      pred_maps = torch.stack(predicted_segmentation_maps).to("cpu")

      # Logical OR
      if outputs1.loss.item() > 2.0:
          new_labels = labels.int() | pred_maps.int()
          logical_or_counter += 1
      else: new_labels = labels

      inputs2 = image_processor(images = images, segmentation_maps = new_labels.to(device), return_tensors = 'pt')

      inputs2.to(device)

      outputs2 = new_model(**inputs2)
    
      # Backward propagation
      loss = outputs2.loss
      loss.backward()

      batch_size = batch["pixel_values"].size(0)
      running_loss += loss.item()
      num_samples += batch_size

      if idx % 50 == 0:
        print("Loss:", running_loss/num_samples)

      # Optimization
      optimizer.step()
        
      torch.cuda.empty_cache()
  print(f"LOGICAL OR PERFORMED {logical_or_counter} times")

#       scheduler.step()

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
repo_name = "upernet-swin-tiny-after-logical-OR"
model.push_to_hub(repo_name)

TESTING

In [None]:
def calculate_iou(pred_mask_list, true_mask_list):
    total_iou = 0.0
    num_masks = len(pred_mask_list)

    for pred_mask, true_mask in zip(pred_mask_list, true_mask_list):
        pred_mask = pred_mask.cpu()
        true_mask = true_mask.cpu()
        intersection = torch.logical_and(pred_mask, true_mask).sum()
        union = torch.logical_or(pred_mask, true_mask).sum()

        iou = intersection.float() / union.float()
        total_iou += iou.item()

    avg_iou = total_iou / num_masks
    return avg_iou

In [None]:
import torch

def calculate_metrics(true_labels, predicted_labels, device):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    true_negatives = 0

    for true, predicted in zip(true_labels, predicted_labels):
        true.to(device)
        predicted.to(device)
        true_positives += torch.sum((predicted == 1) & (true == 1)).item()
        false_positives += torch.sum((predicted == 1) & (true == 0)).item()
        false_negatives += torch.sum((predicted == 0) & (true == 1)).item()
        true_negatives += torch.sum((predicted == 0) & (true == 0)).item()

    precision = true_positives / (true_positives + false_positives + 1e-7)
    recall = true_positives / (true_positives + false_negatives + 1e-7)
    accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives + 1e-7)
    f1_score = 2 * (precision * recall) / (precision + recall + 1e-7)

    return precision, recall, accuracy, f1_score

# Example usage
true_labels = [torch.tensor([1, 0, 1]), torch.tensor([1, 1, 0])]
predicted_labels = [torch.tensor([1, 0, 1]), torch.tensor([1, 1, 0])]

precision, recall, accuracy, f1_score = calculate_metrics(true_labels, predicted_labels, "cuda")
print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("F1 Score:", f1_score)

In [None]:
# UPERNET evaluation WITH NEW ALGO: LOGICAL OR

from tqdm.auto import tqdm

new_model.eval()
original_model.to(device)
new_model.to(device)
device = "cuda"
all_ious = []
precisions, recalls, accuracies, f1_scores = [], [], [], []
for idx, batch in enumerate(tqdm(test_dataloader)):
    if idx in [5, 10,21,22,23,24,25,26,27,28,29,20, 35, 31, 32, 33, 30,50, 100, 125, 150, 200]:
        print(f"IDX {idx}: \nprecision", sum(precisions)/ len(precisions))
        print("f1_score", sum(f1_scores)/ len(f1_scores))
        print("recall", sum(recalls)/ len(recalls))
        print("accuracy", sum(accuracies)/ len(accuracies))
        print("mIoU: ", sum(all_ious)/len(all_ious))
    images, labels = batch['original_images'], batch['original_segmentation_maps']
    
    if idx > 200:
        break
    images, labels = batch['original_images'], batch['original_segmentation_maps']

    images = np.array(images)
    images = torch.tensor(images)
    labels = np.array(labels)
    labels = torch.tensor(labels)/255

    images.to(device)
    labels.to(device)

    # Forward pass
    with torch.no_grad():
      # forward pass

      target_sizes = [(image.shape[0], image.shape[1]) for image in images]

      inputs1 = image_processor(images = images, segmentation_maps = labels, return_tensors = 'pt')
      inputs1.to(device)

      outputs1 = original_model(**inputs1)    
    
      # generate new labels
      predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs1,
                                                                                      target_sizes=target_sizes)

    
      labels.to("cpu")
      inputs1.to("cpu")

      pred_maps = torch.stack(predicted_segmentation_maps).to("cpu")

      # Logical OR
      if outputs1.loss.item() > 0.5:
          new_labels = labels.int() | pred_maps.int()
      else: new_labels = labels

      inputs2 = image_processor(images = images, segmentation_maps = new_labels.to(device), return_tensors = 'pt')

      inputs2.to(device)

      outputs2 = new_model(**inputs2)


    target_sizes = [(image.shape[0], image.shape[1]) for image in images]

    # predicted segmentation maps
    predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs2,
                                                                                  target_sizes=target_sizes)


    for preds in predicted_segmentation_maps:
        preds.int()
        preds.to(device)
    for label in labels:
        label.int()
        label.to(device)

    #for mean iou calculation... pred maps and labels must be same shape
    labels_list = []

    for i in range(labels.size(0)):
        labels[i].int()
        labels_list.append(labels[i].to(device))

#     print(predicted_segmentation_maps, labels_list)
    all_ious.append(calculate_iou(predicted_segmentation_maps, labels_list))
    precision, recall, accuracy, f1_score = calculate_metrics(labels_list, predicted_segmentation_maps, device)
    precisions.append(precision)
    recalls.append(recall)
    accuracies.append(accuracy)
    f1_scores.append(f1_score)

print("FINAL \nprecision", sum(precisions)/ len(precisions))
print("f1_score", sum(f1_scores)/ len(f1_scores))
print("recall", sum(recalls)/ len(recalls))
print("accuracy", sum(accuracies)/ len(accuracies))
print("mIoU: ", sum(all_ious)/len(all_ious))

In [None]:
#last was logical or at loss1 > 2.0 but bigger train set
precision 0.7294630047815394
f1_score 0.7495887245112265
recall 0.7860055675079148
accuracy 0.9768834189759833
mIoU:  0.4203343279659748

In [None]:
# upernet evaluation with SIMPLY NEW LABELS (in training and testing)

from tqdm.auto import tqdm

new_model.eval()
device = "cuda"
all_ious = []
new_model.to(device)
original_model.to(device)

precisions, recalls, accuracies, f1_scores = [], [], [], []
for idx, batch in enumerate(tqdm(test_dataloader)):
    if idx in [5, 10, 25, 50, 100, 125, 150, 200]:
        print(f"IDX {idx}: \nprecision", sum(precisions)/ len(precisions))
        print("f1_score", sum(f1_scores)/ len(f1_scores))
        print("recall", sum(recalls)/ len(recalls))
        print("accuracy", sum(accuracies)/ len(accuracies))
        print("mIoU: ", sum(all_ious)/len(all_ious))
    images, labels = batch['original_images'], batch['original_segmentation_maps']
    
    if idx > 200:
        break

    images = np.array(images)
    images = torch.tensor(images)
    labels = np.array(labels)
    labels = torch.tensor(labels)/255

    images.to(device)
    labels.to(device)
    
    
    target_sizes = [(image.shape[0], image.shape[1]) for image in images]

    # Forward pass
    with torch.no_grad():

        inputs1 = image_processor(images = images, segmentation_maps = labels, return_tensors = 'pt')
        inputs1.to(device)

        outputs1 = original_model(**inputs1)

        labels.to("cpu")
        inputs1.to("cpu")
    
    
        # generate new labels
        predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs1,
                                                                                          target_sizes=target_sizes)


        new_labels = torch.stack(predicted_segmentation_maps)

        # forward pass on new labels
        inputs2 = image_processor(images = images, segmentation_maps = new_labels, return_tensors = 'pt')

        inputs2.to(device)

        outputs2 = new_model(**inputs2)

    target_sizes = [(image.shape[0], image.shape[1]) for image in images]

    predicted_segmentation_maps = image_processor.post_process_semantic_segmentation(outputs2,
                                                                                  target_sizes=target_sizes)

    for preds in predicted_segmentation_maps:
        preds.int()
        preds.to(device)
    for label in labels:
        label.int()
        label.to(device)

    #for mean iou calculation... pred maps and labels must be same shape
    labels_list = []

    for i in range(labels.size(0)):
        labels[i].int()
        labels_list.append(labels[i].to(device))

#     print(predicted_segmentation_maps, labels_list)
    all_ious.append(calculate_iou(predicted_segmentation_maps, labels_list))
    precision, recall, accuracy, f1_score = calculate_metrics(labels_list, predicted_segmentation_maps, device)
    precisions.append(precision)
    recalls.append(recall)
    accuracies.append(accuracy)
    f1_scores.append(f1_score)

print("FINAL \nprecision", sum(precisions)/ len(precisions))
print("f1_score", sum(f1_scores)/ len(f1_scores))
print("recall", sum(recalls)/ len(recalls))
print("accuracy", sum(accuracies)/ len(accuracies))
print("mIoU: ", sum(all_ious)/len(all_ious))

In [None]:
# upernet-swin tiny (tuned)
precision 0.9392357746591729
f1_score 0.8274812357961276
recall 0.8020496805250736
accuracy 0.9476910215432391
mIoU:  0.4426152956756679

# upernet-swin tiny (tuned)
precision 0.8683672456144808
f1_score 0.7479758424948
recall 0.7284804817339592
accuracy 0.9722921968707247
mIoU:  0.5187820670279589

# Upernet with LOGICAL OR (OR applied in both training and testing)
At 20% training loss
precision 0.8409011146308205
f1_score 0.8532656625558744
recall 0.8737207014226013
accuracy 0.9673955127443059
mIoU:  0.4893365881659768

At 12% training loss
precision 0.8699746802445684
f1_score 0.8642844356050445
recall 0.8592141353492534
accuracy 0.9738575328476813
mIoU:  0.5058126130395315

At 8% training loss
precision 0.8674168711460868
f1_score 0.8635056062496748
recall 0.8602445691426681
accuracy 0.9734111747353077
mIoU:  0.5060400659726425


# Upernet with LOGICAL OR TWICE(OR applied in both training and testing)
at 40% training loss
precision 0.8800218235572824
f1_score 0.8670481177351661
recall 0.8570141818017505
accuracy 0.9781614361383997
mIoU:  0.512333486567844


# Upernet with LOGICAL OR (OR applied only in training and not in testing)
At 20% training loss
precision 0.8056717629325668
f1_score 0.8335686847209157
recall 0.8731451883300031
accuracy 0.9480223992855058
mIoU:  0.467702101577412

At 12% training loss
precision 0.8366282128662313
f1_score 0.8486793148867103
recall 0.8624495891429831
accuracy 0.9583616931025791
mIoU:  0.4875807930630716


# upernet swin large (untuned)
precision 0.04674390520428237
f1_score 0.05677240963341936
recall 0.07227933222051418
accuracy 0.1463044294935711
mIoU:  0.33444785990053044

#maskformer tuned
precision 0.7754642685034004
f1_score 0.7528422777953474
recall 0.7397811585765738
accuracy 0.9775063871126577
mIoU:  0.7168471569364722

precision 0.5217402294425123
f1_score 0.5894180085336521
recall 0.999999981816197
accuracy 0.5217402294425123
mIoU:  0.521740224174788

* precision 0.3111225918183273
* f1_score 0.3191659886291905
* recall 0.3295510091217397
* accuracy 0.9829524113575062
* mIoU:  0.29805679046190703

* precision 0.26088595055841357
* f1_score 0.29013017962476445
* recall 0.3274049580908574
* accuracy 0.988511827256274
* mIoU:  0.25725841522216797

