In [None]:
# default_exp instance_segmentation

# Instance segmentation

> Instance segmentation model

In [None]:
# export

from pathlib import Path
from typing import List, Tuple, Union, Optional, Dict, Set

In [None]:
# exporti


import numpy as np
import os
from datetime import datetime
import torch
import torch.utils.data
from torch.hub import download_url_to_file
import torchvision
import PIL
from PIL import Image
from zipfile import ZipFile

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [None]:
#exporti

# Download TorchVision repo to use some files from
# references/detection

if not Path("vision").exists():

    !git clone https://github.com/pytorch/vision.git

    !cp vision/references/detection/utils.py .
    !cp vision/references/detection/transforms.py .
    !cp vision/references/detection/coco_eval.py .
    !cp vision/references/detection/engine.py .
    !cp vision/references/detection/coco_utils.py .

# imports
from engine import train_one_epoch, evaluate
import transforms as T
import utils

In [None]:
for m in [np, torch, torchvision, PIL]:
    print(f"{m.__name__:12}: {m.__version__}")

numpy       : 1.18.5
torch       : 1.7.1
torchvision : 0.8.2
PIL         : 7.2.0


## Download data

In [None]:
dataset_root = Path("./data/dolphins_200")
dataset_zip = dataset_root.parent / "dolphins_200.zip"
dataset_url = "https://s3.eu-central-1.amazonaws.com/ai-league.cisex.org/2020-2021/dolphins-instance-segmentation/dolphins_200.zip"

dataset_zip.parent.mkdir(parents=True, exist_ok=True)

if not dataset_zip.exists():
    torch.hub.download_url_to_file(
        dataset_url,
        dataset_zip,
        hash_prefix=None,
        progress=True,
    )
    

with ZipFile(dataset_zip, 'r') as zip_ref:
    zip_ref.extractall(dataset_root)
    
!ls -lh {dataset_root}

total 36K
drwxr-xr-x 2 sharath sharath 12K Dec 14 14:13 [0m[01;34mJPEGImages[0m/
drwxr-xr-x 2 sharath sharath 12K Dec 14 14:13 [01;34mSegmentationClass[0m/
drwxr-xr-x 2 sharath sharath 12K Dec 14 14:13 [01;34mSegmentationObject[0m/


## Dataset

In [None]:
# exporti


def _enumerate_colors_for_fname(fname: Path) -> Tuple[int, int, int]:
    img = Image.open(fname)
    colors = [y for x, y in img.getcolors()]
    return colors

In [None]:
# export


def enumerate_colors_for_fnames(fnames: List[Path]) -> List[Tuple[int, int, int]]:
    """This function is used to pin (0, 0, 0) color to the front of palette"""
    colors = np.array([_enumerate_colors_for_fname(fname) for fname in fnames]).reshape(
        -1, 3
    )
    colors = set([tuple(x) for x in colors.tolist() if tuple(x) != (0, 0, 0)])
    colors = [(0, 0, 0)] + list(colors)
    return {x: i for i, x in enumerate(colors)}

In [None]:
# exporti


def _substitute_values(xs: np.array, x, y):
    """Not sure I understand what this does"""
    ix_x = xs == x
    ix_y = xs == y
    xs[ix_x] = y
    xs[ix_y] = x

In [None]:
# export


def enumerate_image_for_instances(
    im: Image, force_black_to_zero: bool = True, max_colors=16
) -> np.array:
    pallete_mask = im.convert("P", palette=Image.ADAPTIVE, colors=max_colors)

    xs = np.array(pallete_mask)

    if force_black_to_zero:
        _substitute_values(xs, 0, xs.max())

    return xs

In [None]:
# export


def enumerate_image_for_classes(
    im: Image,
    colors: Dict[Tuple[int], int] = None,
) -> np.array:
    xs = np.array(im)
    xs = [
        ((xs == color).all(axis=-1)).astype(int) * code
        for color, code in colors.items()
    ]
    xs_sum = xs[0]
    for i in range(1, len(xs)):
        xs_sum = xs_sum + xs[i]
    return xs_sum.astype("uint8")

In [None]:
# exporti


class DolphinsDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "JPEGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "SegmentationClass"))))
        self.instances = list(
            sorted(os.listdir(os.path.join(root, "SegmentationObject")))
        )

        fnames = [
            os.path.join(self.root, "SegmentationClass", mask) for mask in self.masks
        ]
        self.class_colors = enumerate_colors_for_fnames(fnames)

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "JPEGImages", self.imgs[idx])
        label_path = os.path.join(self.root, "SegmentationClass", self.masks[idx])
        mask_path = os.path.join(self.root, "SegmentationObject", self.instances[idx])

        img = Image.open(img_path).convert("RGB")

        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask_img = Image.open(mask_path)

        mask = enumerate_image_for_instances(mask_img)

        # instances are encoded as different colors
        obj_ids = np.unique(mask)

        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        label_img = Image.open(label_path)
        label_array = enumerate_image_for_classes(label_img, self.class_colors)

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        labels = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

            class_mask = label_array * masks[i]
            label, count = np.unique(class_mask, return_counts=True)
            assert label.shape[0] <= 2
            label = max(label)
            labels.append(label)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there WAS multi class
        # labels = torch.as_tensor(labels, dtype=torch.int64)
        labels = torch.ones((num_objs,), dtype=torch.int64)

        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
dataset = DolphinsDataset(dataset_root)

## Model

In [None]:
# exporti


def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(
        pretrained=True
    )  # box_score_thresh=0.5

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask, hidden_layer, num_classes
    )

    return model

In [None]:
class MyColorJitter:
    def __init__(self, brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5):
        self.torch_color_jitter = torchvision.transforms.ColorJitter(
            brightness=brightness, contrast=contrast, saturation=saturation, hue=hue
        )

    def __call__(self, image, target):
        image = self.torch_color_jitter(image)
        return image, target

#### Hint: incorporate more transformation classes such as RandomCrop etc. (https://pytorch.org/docs/stable/torchvision/transforms.html)

In [None]:
# exporti


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(
            MyColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5)
        )
        # TODO: add additional transforms: e.g. random crop
    return T.Compose(transforms)

In [None]:
batch_size = 4

# use our dataset and defined transformations
dataset = DolphinsDataset(dataset_root, get_transform(train=True))
dataset_test = DolphinsDataset(dataset_root, get_transform(train=False))

val_split = 0.2
n_val = max(1, round(val_split * len(dataset)))

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-n_val])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-n_val:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4,
    collate_fn=utils.collate_fn,
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    num_workers=4,
    collate_fn=utils.collate_fn,
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# our dataset has two classes only - background and dolphin
num_classes = 2

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=[10, 35], gamma=0.1
)  # StepLR(optimizer, step_size=10, gamma=0.1) #MultiStepLR(optimizer, milestones=[10,20], gamma=0.1)

In [None]:
# slow

# let's train it for 50 epochs
num_epochs = 5

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)



Epoch: [0]  [ 0/40]  eta: 0:00:46  lr: 0.000133  loss: 2.1840 (2.1840)  loss_classifier: 0.6545 (0.6545)  loss_box_reg: 0.2684 (0.2684)  loss_mask: 1.2175 (1.2175)  loss_objectness: 0.0323 (0.0323)  loss_rpn_box_reg: 0.0112 (0.0112)  time: 1.1626  data: 0.6623  max mem: 4474
Epoch: [0]  [10/40]  eta: 0:00:16  lr: 0.001414  loss: 1.3183 (1.4456)  loss_classifier: 0.3435 (0.4040)  loss_box_reg: 0.2955 (0.2960)  loss_mask: 0.5938 (0.6859)  loss_objectness: 0.0144 (0.0363)  loss_rpn_box_reg: 0.0187 (0.0234)  time: 0.5365  data: 0.0662  max mem: 4751
Epoch: [0]  [20/40]  eta: 0:00:10  lr: 0.002695  loss: 1.1231 (1.1813)  loss_classifier: 0.2340 (0.2956)  loss_box_reg: 0.2788 (0.2788)  loss_mask: 0.4567 (0.5209)  loss_objectness: 0.0303 (0.0438)  loss_rpn_box_reg: 0.0187 (0.0422)  time: 0.4744  data: 0.0072  max mem: 4751
Epoch: [0]  [30/40]  eta: 0:00:04  lr: 0.003975  loss: 0.7580 (1.0346)  loss_classifier: 0.1500 (0.2448)  loss_box_reg: 0.2493 (0.2768)  loss_mask: 0.2966 (0.4401)  loss_ob

In [None]:
# skip
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])
prediction

[{'boxes': tensor([[222.4442, 225.1240, 343.3164, 273.7512],
          [344.1282, 244.0352, 466.5335, 279.8188],
          [334.6013, 226.7830, 396.2329, 258.0442],
          [288.2923, 226.9448, 342.5175, 254.3230],
          [390.7780, 243.1464, 426.8419, 259.4730],
          [355.5486, 243.7918, 426.3789, 261.8110],
          [350.9827, 228.3645, 417.8277, 262.1019],
          [297.2738, 229.0661, 376.3158, 256.3553],
          [375.0706, 234.6990, 425.1687, 260.2922],
          [362.1086, 226.0025, 395.6159, 255.3446],
          [314.8678, 233.2699, 425.9194, 265.0935],
          [244.8049, 252.0540, 354.5661, 274.0536],
          [297.9895, 227.4242, 492.5534, 279.9767],
          [338.8068, 243.0577, 424.5018, 272.2073],
          [326.0295, 242.8014, 368.2765, 256.0810],
          [307.6323, 238.2938, 388.3889, 260.2543],
          [382.7429, 243.5956, 435.1300, 271.1403],
          [253.9753, 260.2061, 311.5323, 273.3116],
          [292.7706, 245.9255, 372.9172, 257.2255],
   

In [None]:
# exporti


def _show_pred(dataset_test, n=None):
    if n == None:
        n = len(dataset_test)

    for i in range(n):
        img = dataset_test[i][0]
        display(
            Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy()).resize(
                (300, 240)
            )
        )
        model.eval()
        with torch.no_grad():
            prediction = model([img.to(device)])
        predicted_masks = prediction[0]["masks"]
        scores = prediction[0]["scores"]

        for i in range(predicted_masks.shape[0]):
            score = scores[i]
            if score > 0.9:
                print(f"Mask for score {score:.1%}")
                display(
                    Image.fromarray(
                        predicted_masks[i, 0].mul(255).byte().cpu().numpy()
                    ).resize((300, 240))
                )
            else:
                print(f"Skipping mask for score {score:.1%}")


_show_pred(dataset_test)

In [None]:
# exporti


def _save_model_with_timestamp(
    model, save_path="/work/data/dupini/processed/body_100_resized/"
):
    save_date_path = (
        save_path + "model" + datetime.now().strftime("-%Y-%m-%d-%H-%M-%S") + ".pt"
    )
    print(save_date_path)
    torch.save(model.state_dict(), save_date_path)

In [None]:
# _save_model_with_timestamp(model)

## Calculate metrics

In [None]:
## todo

# ----- There are IOU and DICE metrics, but finally using only IOU sice it is the most preferred for segmentation

In [None]:
def iou_metric(binary_segmentation: np.array, binary_gt_label: np.array) -> float:
    """
    Compute the IOU between two binary segmentation (typically one ground truth and a predicted one).
    Input:
        binary_segmentation: binary 2D numpy array representing the region of interest as segmented by the algorithm
        binary_gt_label: binary 2D numpy array representing the region of interest as provided in the database
    Output:
        IOU: IOU between the segmentation and the ground truth
    """
    
    assert binary_segmentation.dtype in [np.int, np.bool]
    assert binary_gt_label.dtype in [np.int, np.bool]

    # turn all variables to booleans, just in case
    binary_segmentation = np.asarray(binary_segmentation, dtype=np.bool)
    binary_gt_label = np.asarray(binary_gt_label, dtype=np.bool)

    # compute the intersection
    intersection = np.logical_and(binary_segmentation, binary_gt_label)
    union = np.logical_or(binary_segmentation, binary_gt_label)
    
    # count the number of True pixels in the binary segmentation
    segmentation_pixels = float(np.sum(binary_segmentation.flatten()))
    
    # same for the ground truth
    gt_label_pixels = float(np.sum(binary_gt_label.flatten()))
    
    # same for the intersection and union
    intersection = float(np.sum(intersection.flatten()))
    union = float(np.sum(union.flatten()))
    
    # compute the Dice coefficient
    smooth = 0.001
    IOU = (intersection + smooth)/ (union + smooth)

    return IOU

NameError: name 'np' is not defined

In [None]:
def dice_coefficient(binary_segmentation, binary_gt_label):
    """
    Compute the Dice coefficient between two binary segmentation.
    Dice coefficient is defined as here: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
    Input:
        binary_segmentation: binary 2D numpy array representing the region of interest as segmented by the algorithm
        binary_gt_label: binary 2D numpy array representing the region of interest as provided in the database
    Output:
        dice_value: Dice coefficient between the segmentation and the ground truth
    """

    # turn all variables to booleans, just in case
    binary_segmentation = np.asarray(binary_segmentation, dtype=np.bool)
    binary_gt_label = np.asarray(binary_gt_label, dtype=np.bool)

    # compute the intersection
    intersection = np.logical_and(binary_segmentation, binary_gt_label)

    # count the number of True pixels in the binary segmentation
    segmentation_pixels = float(np.sum(binary_segmentation.flatten()))
    # same for the ground truth
    gt_label_pixels = float(np.sum(binary_gt_label.flatten()))
    # same for the intersection
    intersection = float(np.sum(intersection.flatten()))

    # compute the Dice coefficient
    dice_value = 2 * intersection / (segmentation_pixels + gt_label_pixels)

    # return it
    return dice_value

## Submission of results


In [None]:
## todo

In [None]:
def clean_metric_array(input_array):
    '''
    Input:
        input_array: input array size m x n containing iou metric values
    Output:
        cleaned input array
    description: 
        This function makes sure that there is only one max value across the columns
    '''
    
    
    max_val = np.argmax(input_array,axis=1)
    uniq, uniq_idx, unq_counts = np.unique(max_val, axis=0, return_index=True, return_counts=True)

    new_arr = uniq[unq_counts > 1]
    if len(uniq)!=max_val.shape[0]:
        for row_idx in new_arr:
            max_col = np.where(max_val==row_idx)
            if input_array[max_col[0][0],row_idx]>=input_array[max_col[0][1],row_idx]:
                input_array[max_col[0][1],row_idx] = -1
            else:
                input_array[max_col[0][0],row_idx] = -1
    return input_array

In [None]:


def inference_test(dataset_test, n=None):
    '''
    Inputs:
        dataset: the dataset for which you need to calculate the metric for
        # todo: model also as input?
    Output:
        mean_dataset_iou: mean IOU metric for entire input dataset
    '''
    if n == None:
        n = len(dataset_test)
    test_set_iou = []
    
    for i in range(n):
        # get the input image
        img = dataset_test[i][0]
        
        # get the ground-truth mask and convert to numpy
        gt_masks_all = dataset_test[i][1]["masks"].mul(255).byte().cpu().numpy()
        
        # evaluate the model on the input image
        model.eval()
        with torch.no_grad():
            prediction = model([img.to(device)])
            
        # get the instance mask predictions    
        predicted_masks = prediction[0]["masks"]
        
        # get the score 
        scores = prediction[0]["scores"]
        
        # convert the predicted masks to numpy
        pred_masks_all = predicted_masks.squeeze(1).mul(255).byte().cpu().numpy()
        
        # create an array for the metric with size m x n, 
        # with m as #instances in the predictions and n as #instance in ground-truth
        # ideally m and n should be equal i.e both predicted and ground-truth should contain the same number of instances
        
        m = pred_masks_all.shape[0]
        n = gt_masks_all.shape[0]
        
        # initialize the array with zeros
        iou_array = np.zeros((n, n))  # NOTE: ? change the array size to (m,n) ?

        for j in range(n):         # NOTE: ?change to m?
            score = scores[j]
            if score > 0.9: 
                pred_mask = pred_masks_all[j, :, :]
                for k in range(n):
                    gt_mask = gt_masks_all[k, :, :]
                    iou_score = iou_metric(pred_mask>127, gt_mask>127) 
                    iou_array[j,k]=iou_score
            else:
                break

        # clean the array to have only one maximum per column
        iou_array = clean_metric_array(clean_metric_array(clean_metric_array(iou_array)))

        # mean iou metric for all the instance for a single input image
        single_test_iou = np.mean(np.max(iou_array, axis=1))
        test_set_iou.append(single_test_iou)
        
    # mean iou metric for the entire dataset
    mean_dataset_iou = np.mean(np.array(test_set_iou))
    return mean_dataset_iou


In [None]:
mean_test_iou = inference_test(dataset_test)                             
print(mean_test_iou)

0.6507149984288729
