In [32]:
import torch
import os
from PIL import Image
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.nn as nn
import torchvision.models as models
import matplotlib.pyplot as plt
import numpy as np
from torch import optim
from tqdm.auto import tqdm

In [33]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [34]:
!unzip /content/drive/MyDrive/AIP/A2/PascalVOC.zip

Archive:  /content/drive/MyDrive/AIP/A2/PascalVOC.zip
replace PascalVOC/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [117]:
model = models.mobilenet_v2(pretrained=True)

# Define a new classifier
num_classes = 21

# Encoder
features = nn.Sequential(*list(model.children())[:-1])                # Extract layers up to and excluding the classifier


# Decoder
decoder = nn.Sequential(
    nn.ConvTranspose2d(1280, 320, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
    nn.BatchNorm2d(320),
    nn.ReLU(),
    nn.ConvTranspose2d(320, 96, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
    nn.BatchNorm2d(96),
    nn.ReLU(),
    nn.ConvTranspose2d(96, 32, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
    nn.BatchNorm2d(16),
    nn.ReLU(),
    nn.ConvTranspose2d(16, 21, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
    nn.Sigmoid()
)

# Combining both:
########################################

model = nn.Sequential(features, decoder)


########################################

In [69]:
def ch3_to_ch1(img):

  num_color = np.unique(img.reshape(-1, img.shape[2]), axis = 0)

  generated_img = np.zeros(img.shape[:2]).astype(np.uint8)

  for col in num_color:
    
    locs = np.where(np.all(img==col,axis=2))

    try:
      generated_img[locs[0], locs[1]] = np.where(np.all(colors == col, axis=1))[0][0]
      
    except Exception as e:
      pass
  return generated_img


In [70]:
class ImgDataset(Dataset):
  def __init__(self, img_dir, mask_dir, transform=None):
    self.img_dir = img_dir
    self.mask_dir = mask_dir
    self.transform = transform
    self.images = os.listdir(mask_dir)
    
  def __len__(self):
    return len(self.images)
  
  def __getitem__(self, index):
    img_path = os.path.join(self.img_dir, self.images[index].replace(".png", ".jpg"))
    mask_path = os.path.join(self.mask_dir, self.images[index].replace(".png", ".png"))
    image = np.array(Image.open(img_path).convert("RGB"))
    mask = np.array(Image.open(mask_path).convert("RGB"))
    mask[mask == 255.0] = 1.0
    
    if self.transform is not None:
        # transform_img = transforms.Compose([self.transform, transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        # transform_label = transforms.Compose([self.transform])

        transform_img = transforms.Compose([transforms.ToTensor(),
                                        transforms.Resize((224, 224)),
                                                          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        image = transform_img(image)
        
        mask = ch3_to_ch1(mask)
        transform_mask = transforms.Compose([transforms.ToTensor(),
                                        transforms.Resize((224, 224))])
        mask = transform_mask(mask)

    return image, mask



In [71]:
# define transformations

transform_image = transforms.Compose([ transforms.ToTensor(),
                                transforms.Resize((224, 224))])


# create the train and test datasets

trainDS = ImgDataset(img_dir="/content/PascalVOC/trainval/Images", mask_dir="/content/PascalVOC/trainval/Annotations",
	transform=transform_image)
testDS = ImgDataset(img_dir="/content/PascalVOC/test/Images", mask_dir="/content/PascalVOC/test/Annotations",
    transform=transform_image)

print(f"found {len(trainDS)} examples in the training set...")
print(f"found {len(testDS)} examples in the test set...")

# create the training and test data loaders
trainLoader = DataLoader(trainDS, shuffle=True, batch_size = 8)
testLoader = DataLoader(testDS, shuffle=False,	batch_size = 8)

found 422 examples in the training set...
found 210 examples in the test set...


In [72]:
len(trainLoader)
trainLoader

<torch.utils.data.dataloader.DataLoader at 0x7ff922e7aca0>

In [118]:
def train_MobileNetV2(model, trainLoader):
  epochs = 25
  params_to_update = []
  for name,param in model.named_parameters():
    if param.requires_grad == True:
      params_to_update.append(param)
      # print("Parameters to Update by optimizer:",name)
      
  optimizer = optim.Adam(params_to_update, lr=0.001)

  criterion = nn.CrossEntropyLoss(ignore_index=255)

  model = model.to("cuda")
  for epoch in range(epochs):
    running_loss = 0.00
    running_corrects = 0

    for inputs, masks in trainLoader:
      inputs = inputs.to("cuda")
      masks = masks.to("cuda")

      optimizer.zero_grad()   # zero the parameter gradients at the start of training loop
      
      outputs = model(inputs)

      masks = masks.clone().squeeze().detach().long()
      outputs = outputs.type(torch.float64)

      loss = criterion(outputs, masks)
      
      loss.backward()
      optimizer.step()
    
      print(f"training loss: {loss.item():.5f}")

  return model

modelV2 = train_MobileNetV2(model, trainLoader)

training loss: 3.04173
training loss: 3.03322
training loss: 3.02451
training loss: 3.01626
training loss: 3.00852
training loss: 3.00096
training loss: 2.99340
training loss: 2.98600
training loss: 2.97863
training loss: 2.97111
training loss: 2.96344
training loss: 2.95589
training loss: 2.94809
training loss: 2.94025
training loss: 2.93251
training loss: 2.92440
training loss: 2.91633
training loss: 2.90820
training loss: 2.89992
training loss: 2.89155
training loss: 2.88315
training loss: 2.87476
training loss: 2.86628
training loss: 2.85781
training loss: 2.84930
training loss: 2.84074
training loss: 2.83225
training loss: 2.82373
training loss: 2.81525
training loss: 2.80680
training loss: 2.79836
training loss: 2.78998
training loss: 2.78159
training loss: 2.77333
training loss: 2.76511
training loss: 2.75689
training loss: 2.74876
training loss: 2.74073
training loss: 2.73268
training loss: 2.72473
training loss: 2.71689
training loss: 2.70909
training loss: 2.70134
training lo

In [119]:
# Evaluation

# Create a color map for the segmentation labels
colors = np.array([ (0, 0, 0),  # background
               (128, 0, 0), # aeroplane
               (0, 128, 0), # bicycle
               (128, 128, 0), # bird
               (0, 0, 128), # boat
               (128, 0, 128), # bottle
               (0, 128, 128), # bus 
               (128, 128, 128), # car
               (64, 0, 0), # cat
               (192, 0, 0), # chair
               (64, 128, 0), # cow
               (192, 128, 0), # dining table
               (64, 0, 128), # dog
               (192, 0, 128), # horse
               (64, 128, 128), # motorbike
               (192, 128, 128), # person
               (0, 64, 0), # potted plant
               (128, 64, 0), # sheep
               (0, 192, 0), # sofa
               (128, 192, 0), # train
               (0, 64, 128)]) #



In [120]:
import cv2
def metric_calculation(image_path, mask_path, model):
  # Load and preprocess the image
  img = cv2.imread(image_path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

  img_tensor = transforms.ToTensor()(img)
  img_tensor = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(img_tensor)
  img_tensor = img_tensor.unsqueeze(0).to("cuda")

  # Make the prediction
  model.eval()
  with torch.no_grad():
      output = model(img_tensor)
      
  output = output.argmax(1).squeeze().detach().cpu().numpy()

  colored_output = colors[output]

  output = colored_output

  # Load the ground truth label
  label = cv2.imread(mask_path)
  label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)
  label = np.resize(label, output.shape)

  # Calculate pixel accuracy
  pixel_acc = (output == label).astype(int).sum() / label.size


  # Calculate mIoU
  iou = []
  for class_id in colors:
      true_positives = ((output == class_id) & (label == class_id)).sum()
      false_positives = ((output == class_id) & (label != class_id)).sum()
      false_negatives = ((output != class_id) & (label == class_id)).sum()

      if true_positives + false_positives + false_negatives == 0:
          iou.append(0)
      else:
          iou.append(true_positives / (true_positives + false_positives + false_negatives))

  miou = np.mean(iou)

  return pixel_acc, miou


In [121]:
image_path = "/content/PascalVOC/test/Images/000068.jpg"
mask_path = "/content/PascalVOC/test/Annotations/000068.png"
print(metric_calculation(image_path, mask_path, model))

(0.9216274685329862, 0.7682738157040111)


In [122]:
annotation_name = []
for a, b, c in os.walk("/content/PascalVOC/test/Annotations"):
  annotation_name = c
anno = []
for string in annotation_name:
  anno1 = string.replace(".png", "")
  anno.append(anno1)

In [123]:
pixel_acc_cumulative = 0
mIoU_cumulative = 0
for image_name in tqdm(anno):
  a, b = metric_calculation(f"/content/PascalVOC/test/Images/{image_name}.jpg",f"/content/PascalVOC/test/Annotations/{image_name}.png", model)
  pixel_acc_cumulative = pixel_acc_cumulative + a
  mIoU_cumulative = mIoU_cumulative + b
print(f"Average Pixel Accuracy over Test set: {pixel_acc_cumulative/len(anno):.5f}")
print(f"Average mIoU over Test set: {mIoU_cumulative/len(anno):.5f}")

  0%|          | 0/210 [00:00<?, ?it/s]

Average Pixel Accuracy over Test set: 0.76564
Average mIoU over Test set: 0.61378
