In [None]:
# Implementation in Google Colab

!pip install cython
# Install pycocotools, the version by default in Colab
# has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-85ecxa2r
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-85ecxa2r
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp36-cp36m-linux_x86_64.whl size=266458 sha256=9dfac25e8b86cee1be2cf58146ae773cf50f753622eed2f3057fe75dbe11540c
  Stored in directory: /tmp/pip-ephem-wheel-cache-idqhw00j/wheels/90/51/41/646daf401c3bc408ff10de34ec76587a9b3ebfac8d21ca5c3a
Successfully built pycocotools
Installing collected packages: pycocotools
  Found existing installation: pycocotools 2.0
    Uninstalling pycocotools-2.0:
      Successfully uninstalled pycocotools-2.0
Successfully installed pycocotools-2.0


Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [None]:
import os
import json
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import skimage.io
import cv2
import skimage.draw
from matplotlib import pyplot as plt

class Dataset(torch.utils.data.Dataset):
    def __init__(self, root, json_file, transforms=None):
        self.root = root
        self.transforms = transforms
        classes_dict = {"class":1}
        self.polygons = []
        self.imgs = []
        dataset_dir = root
        # annotations create using makesense.ai online annotation tool in VGG Json format
        annotations = json.load(open(os.path.join(dataset_dir, json_file)))
        annotations = list(annotations.values())  # don't need the dict keys

        annotations = [a for a in annotations if a['regions']]
        for a in annotations:
            polygon = [r['shape_attributes'] for r in a['regions'].values()]
            self.polygons.append(polygon)
            self.imgs.append(os.path.join(dataset_dir, a['filename']))

    def __getitem__(self, idx):
        # load images and masks
        img_path = self.imgs[idx]
        masks_p = self.polygons[idx] # json polygon. work on this one
        img = Image.open(img_path).convert("RGB")

        image = skimage.io.imread(img_path)
        height, width = image.shape[:2]

        masks = np.zeros([len(masks_p), height, width],
                        dtype=np.bool)

        for i, p in enumerate(masks_p):
            # Get indexes of pixels inside the polygon and set them to True
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            masks[i, rr, cc] = True

        # get bounding box coordinates for each mask
        num_objs = len(masks_p)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class so creating labels like this
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["classes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
      
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
%%shell
# Only ONCE
# Download TorchVision repo to use some files from
# references/detection
#git clone https://github.com/pytorch/vision.git
cd vision
#git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../



In [None]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T


def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        transforms.append(T.RandomHorizontalFlip(0.5))
    transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    return T.Compose(transforms)

In [None]:
dataset_train = Dataset("Main/train", "train_label.json", get_transform(train=True))
dataset_val = Dataset("Main/val", "val_label.json", get_transform(train=False))

In [None]:
trainable_backbone_layers=2

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
   
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True, trainable_backbone_layers=trainable_backbone_layers)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256 # change this and see the results
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [None]:
# define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
# our dataset has two classes only - background and class
num_classes = 2

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 10 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=10,
                                               gamma=0.1)

cuda


In [None]:
# let's train it for 15 epochs
num_epochs = 20

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_val, device=device)

torch.save(model.state_dict(), "{}_layer_trained_model_{}epoch.bin".format(trainable_backbone_layers, num_epochs))



Epoch: [0]  [ 0/45]  eta: 0:02:02  lr: 0.000119  loss: 4.8921 (4.8921)  loss_classifier: 0.6159 (0.6159)  loss_box_reg: 0.4502 (0.4502)  loss_mask: 2.6733 (2.6733)  loss_objectness: 1.0513 (1.0513)  loss_rpn_box_reg: 0.1013 (0.1013)  time: 2.7272  data: 2.0974  max mem: 3174
Epoch: [0]  [10/45]  eta: 0:00:52  lr: 0.001254  loss: 3.3877 (3.5689)  loss_classifier: 0.5542 (0.5346)  loss_box_reg: 0.5319 (0.6036)  loss_mask: 1.5652 (1.6619)  loss_objectness: 0.6827 (0.6729)  loss_rpn_box_reg: 0.0992 (0.0959)  time: 1.4899  data: 0.8223  max mem: 3234
Epoch: [0]  [20/45]  eta: 0:00:28  lr: 0.002389  loss: 2.6035 (2.8270)  loss_classifier: 0.4396 (0.4804)  loss_box_reg: 0.7462 (0.7110)  loss_mask: 0.7996 (1.1116)  loss_objectness: 0.1591 (0.4481)  loss_rpn_box_reg: 0.0695 (0.0759)  time: 1.0791  data: 0.4046  max mem: 3250
Epoch: [0]  [30/45]  eta: 0:00:16  lr: 0.003524  loss: 1.6844 (2.3856)  loss_classifier: 0.3777 (0.4285)  loss_box_reg: 0.7462 (0.7187)  loss_mask: 0.3298 (0.8468)  loss_ob

  "Palette images with Transparency expressed in bytes should be "


Test:  [ 0/16]  eta: 0:00:16  model_time: 0.3765 (0.3765)  evaluator_time: 0.1580 (0.1580)  time: 1.0455  data: 0.5077  max mem: 3363
Test:  [15/16]  eta: 0:00:00  model_time: 0.1645 (0.2074)  evaluator_time: 0.0547 (0.0955)  time: 0.3455  data: 0.0366  max mem: 3363
Test: Total time: 0:00:05 (0.3603 s / it)
Averaged stats: model_time: 0.1645 (0.2074)  evaluator_time: 0.0547 (0.0955)
Accumulating evaluation results...
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.01s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.728
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.987
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.923
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.250
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.677
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.750
 Average Recal

In [None]:
# pick one image from the validation set
img, _ = dataset_val[10]
# put the trained model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
#if necessary laod the model from .bin file and put it in evaluation model
model = get_instance_segmentation_model(num_classes)
model.load_state_dict(torch.load("2_layer_trained_model_20epoch.bin"))
model = model.to(device)

In [None]:
import os
path = "Main/test"
images_test = [os.path.join(path, i) for i in os.listdir(path)]
dataset_test = []
trans = get_transform(train=False)
for image in images_test:
  img = Image.open(image)
  dataset_test.append(trans(img, None))

In [None]:
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    test_p = model([img.to(device)])

In [None]:
cols = 8
rows = len(test_p[0]['masks']) // cols + 1
plt.figure(figsize=(14, 14 * rows // cols))

for i in range(len(test_p[0]['masks'])):
    plt.subplot(rows, cols, i+1)
    plt.axis('off')
    plt.imshow(test_p[0]['masks'][i,0].mul(255).byte().cpu().numpy())

plt.show()

In [None]:
# To use visualization code of matterport, Results conversion into required format
def dataManipulation(img, prediction):
  im = img.mul(255).permute(1, 2, 0).byte().numpy()
  m, _, h, w = prediction[0]['masks'].shape
  masks = np.zeros((h, w, m))
  for i in range(len(prediction[0]['masks'])):
    masks[:,:,i] = np.around(prediction[0]['masks'][i,0].data.cpu().numpy()) 
  rois = prediction[0]['boxes'].data.cpu().numpy()  
  class_names = {0: "BG", 1:"box"}
  class_ids = prediction[0]['labels'].data.cpu().numpy()

  return im, rois, masks, class_ids, class_names 

In [None]:
import visualize

im, rois, masks, class_ids, class_names = dataManipulation(img, test_p)
visualize.display_instances(im, rois, masks, class_ids, class_names, figsize=(5,5), show_bbox=False)

In [None]:
cols = 8
rows = masks.shape[-1] // cols + 1
plt.figure(figsize=(14, 14 * rows // cols))

for i in range(masks.shape[-1]):
    plt.subplot(rows, cols, i+1)
    a = get_segment_crop(im, mask=masks[:,:,i])
    rgba = cv2.cvtColor(a, cv2.COLOR_RGB2RGBA)
    rgba[:, :, 3] = get_segment_crop(masks[:,:,i]*255, mask=masks[:,:,i])
    plt.axis('off')
    plt.imshow(rgba)

plt.show()

In [None]:
def Contour2Quadrangle(contour):
  # Approximating the contour by a quadrangle.
  # cv2.approxPolyDP is a function approximating a contour by a polygon with as few vertixes as possible 
  # under the condition that the residual of the approximation is lower than the threshold.
  # Miyazawa-San implemented a source to find appropriate threshold dynamically that let the polygon quadrangle.
  def getApprox(contour, alpha):
    epsilon = alpha * cv2.arcLength(contour, True)
    approx = cv2.approxPolyDP(contour, epsilon, True)
    return approx

  # find appropriate epsilon
  def getQuadrangle(contour):
    alpha = 0.1
    beta = 2 # larger than 1
    approx = getApprox(contour, alpha)
    if len(approx) < 4:
      while len(approx) < 4:
        alpha = alpha / beta
        approx = getApprox(contour, alpha)  
      alpha_lower = alpha
      alpha_upper = alpha * beta
    elif len(approx) > 4:
      while len(approx) > 4:
        alpha = alpha * beta
        approx = getApprox(contour, alpha)  
      alpha_lower = alpha / beta
      alpha_upper = alpha
    if len(approx) == 4:
      return approx
    alpha_middle = ( alpha_lower * alpha_upper ) ** 0.5
    approx_middle = getApprox(contour, alpha_middle)
    while len(approx_middle) != 4:
      if len(approx_middle) < 4:
        alpha_upper = alpha_middle
        approx_upper = approx_middle
      if len(approx_middle) > 4:
        alpha_lower = alpha_middle
        approx_lower = approx_middle
      alpha_middle = ( alpha_lower * alpha_upper ) ** 0.5
      approx_middle = getApprox(contour, alpha_middle)
    return approx_middle

  def getQuadrangleWithRegularOrder(contour):
    approx = getQuadrangle(contour)
    hashable_approx = [tuple(a[0]) for a in approx]
    sorted_by_axis0 = sorted(hashable_approx, key=lambda x: x[0])
    sorted_by_axis1 = sorted(hashable_approx, key=lambda x: x[1])
    topleft_set = set(sorted_by_axis0[:2]) & set(sorted_by_axis1[:2])
    #assert len(topleft_set) == 1
    if topleft_set == set():
      topleft = set(sorted_by_axis0[:2]).pop()
    else:
      topleft = topleft_set.pop()
    topleft_idx = hashable_approx.index(topleft)
    approx_with_reguler_order = [ approx[(topleft_idx + i) % 4] for i in range(4) ]
    return approx_with_reguler_order

  return getQuadrangleWithRegularOrder(contour)

In [None]:
rect_img_w = 600 
rect_img_h = 300 

all_plots = [im]
for i in range(masks.shape[-1]):
  contours, _ = cv2.findContours(masks[:,:,i].astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  contour = max(contours, key=cv2.contourArea)
  corner_points = Contour2Quadrangle(contour)
  src = np.float32(list(map(lambda x: x[0], corner_points)))
  dst = np.float32([[0,0],[0, rect_img_h],[rect_img_w, rect_img_h],[rect_img_w, 0]])

  M = cv2.getPerspectiveTransform(src, dst)
  transformed = cv2.warpPerspective(im, M, (rect_img_w, rect_img_h))
  all_plots += [transformed]

visualize.display_images(all_plots, cols=8)

In [None]:
#Apply this fuction after prediction to remove overlapping boxes
#Haven't tried yet
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
    # if there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # if the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # initialize the list of picked indexes
    pick = []

    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # compute the area of the bounding boxes and grab the indexes to sort
    # (in the case that no probabilities are provided, simply sort on the
    # bottom-left y-coordinate)
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = y2

    # if probabilities are provided, sort on them instead
    if probs is not None:
        idxs = probs

    # sort the indexes
    idxs = np.argsort(idxs)

    # keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # grab the last index in the indexes list and add the index value
        # to the list of picked indexes
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)

        # find the largest (x, y) coordinates for the start of the bounding
        # box and the smallest (x, y) coordinates for the end of the bounding
        # box
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])

        # compute the width and height of the bounding box
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)

        # compute the ratio of overlap
        overlap = (w * h) / area[idxs[:last]]

        # delete all indexes from the index list that have overlap greater
        # than the provided overlap threshold
        idxs = np.delete(idxs, np.concatenate(([last],
            np.where(overlap > overlapThresh)[0])))

    # return only the bounding boxes that were picked
    return boxes[pick].astype("int")