## Instance Segmentation (Mask R-cnn & Pedestrian detection & Instance Segmentation)

In [3]:
import os
import numpy as np
import torch
from PIL import  Image
from torch.utils.data import Dataset, DataLoader

### Dataset
The only specificity that we require is that the dataset __getitem__ should return:  
image: a PIL Image of size (H, W)  
target: a dict containing the following fields  
boxes (FloatTensor[N, 4]): the coordinates of the N bounding boxes in [x0, y0, x1, y1] format, ranging from 0 to W and 0 to H  
labels (Int64Tensor[N]): the label for each bounding box  
image_id (Int64Tensor[1]): an image identifier. It should be unique between all the images in the dataset, and is used during evaluation  
area (Tensor[N]): The area of the bounding box. This is used during evaluation with the COCO metric, to separate the metric scores between small, medium and large boxes.  
iscrowd (UInt8Tensor[N]): instances with iscrowd=True will be ignored during evaluation.  
(optionally) masks (UInt8Tensor[N, H, W]): The segmentation masks for each one of the objects  
(optionally) keypoints (FloatTensor[N, K, 3]): For each one of the N objects, it contains the K keypoints in [x, y, visibility] format, defining the object. visibility=0 means that the keypoint is not visible. Note that for data augmentation, the notion of flipping a keypoint is dependent on the data representation, and you should probably adapt  

In [43]:
class MaskRcnnDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all iamges and sort
        self.images = sorted(os.listdir(os.path.join(root,"PNGImages")))
        self.masks = sorted(os.listdir(os.path.join(root,"PedMasks")))
    def __getitem__(self,idx):
        img_path = os.path.join(self.root, "PNGImages", self.images[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        # image
        img = Image.open(img_path).convert("RGB")
        img = np.array(img)
        print(type(img), img.dtype)
        img = torch.tensor(img, dtype=torch.uint8)
        mask = Image.open(mask_path)
        mask = np.array(mask)
        # select all ids and sort id
        obj_ids = np.unique(mask)
        # remove id0, which is the background
        obj_ids = obj_ids[1:]
        # split the color-encoded mask into a set
        # of binary masks
        # the output of mask rcnn: binary mask for each instance
        masks = mask == obj_ids[:, None, None]
        num_obj = len(obj_ids)
        boxes = []
        for i in range(num_obj):
            pos = np.where(masks[i])
            xmin = np.min(pos[0])
            xmax = np.max(pos[0])
            ymin = np.min(pos[1])
            ymax = np.max(pos[1])
            boxes.append([xmin,ymin,xmax,ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        #labels there is only one class
        labels = torch.ones((num_obj,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        img_id = torch.tensor([idx])
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_obj,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = img_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        '''
        if self.transforms is not None:
            img = self.transforms(img)
        '''
        return img, target

    def __len__(self):
        return len(self.images)

### model

In [7]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
def getmodel(num_classes):
    # load pretrained model
    model = torch.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    
    #get number of input features for classification
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    
    
    #replace the head with new network
    
    
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)
    return model

### transforms

In [8]:
import torchvision.transforms as T
def minetransforms(istrain):
    transform = []
    transform.append(T.ToTensor())
    #data augmentation
    if(istrain):
        transform.append(T.RandomHorizontalFlip(0.5))
    return transform

### Test

In [45]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = MaskRcnnDataset('./data/PennFudanPed', minetransforms(istrain=False))
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=2, shuffle=True, num_workers=4)
images,targets = next(iter(data_loader))
print(images.shape)

<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8
<class 'numpy.ndarray'> uint8


TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/mingjun/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/mingjun/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/mingjun/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "<ipython-input-43-3f46414a0519>", line 54, in __getitem__
    img = self.transforms(img)
TypeError: 'list' object is not callable


<class 'numpy.ndarray'> uint8


### train