# Plane Segmenter

In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Fast install
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Full install
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import json, cv2, csv, random, datetime
import pandas as pd
import numpy as np
from google.colab.patches import cv2_imshow
from sklearn.metrics import jaccard_score
from PIL import Image, ImageDraw
from tqdm.notebook import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import detectron2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.structures import BoxMode
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import build_detection_test_loader, MetadataCatalog, DatasetCatalog
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.utils.logger import setup_logger
setup_logger()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
BASE_DIR = '/content/drive/My Drive/data'
OUTPUT_DIR = '{}/output'.format(BASE_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)

## Object Detection

In [None]:
def get_detection_data(set_name):
  data_dirs = '{}/data'.format(BASE_DIR)
  json_file = os.path.join(data_dirs, "train.json")
  if set_name in ["train", "val"]:
    with open(json_file) as f:
      imgs_anns = json.load(f)
  set_list = os.listdir(os.path.join(data_dirs, "train" if set_name in ["train", "val"] else set_name))
  if set_name == "val":
    set_list = set_list[:int(0.17 * len(set_list))]
  elif set_name == "train":
    set_list = set_list[int(0.17 * len(set_list)):]

  dataset = []
  counter = 0
  for filename in set_list:
    f = os.path.join(data_dirs, "train" if set_name in ["train", "val"] else set_name, filename)
    if os.path.isfile(f) and filename != "desktop.ini":
      height, width = cv2.imread(f).shape[:2]
      record = {
        "file_name": f,
        "image_id": counter,
        "height": height,
        "width": width,
        "annotations": []
      }
      counter += 1
      if set_name in ["train", "val"]:
        for v in imgs_anns:
          if filename == v["file_name"]:
            record["image_id"] = v["image_id"]
            obj = {
              "bbox": v["bbox"],
              "bbox_mode": BoxMode.XYWH_ABS,
              "segmentation": v["segmentation"],
              "category_id": 0,
              "iscrowd": 0
            }
            record["annotations"].append(obj)
      dataset.append(record)
    
  return dataset

In [None]:
DatasetCatalog.remove('data_detection_train')
DatasetCatalog.remove('data_detection_test')
DatasetCatalog.remove('data_detection_val')

for d in ["train", "test", "val"]:
  DatasetCatalog.register("data_detection_" + d, lambda d=d: get_detection_data(d))
  MetadataCatalog.get("data_detection_" + d).set(thing_classes=["planes"])

In [None]:
dataset_dicts = get_detection_data("train")
data_detection_metadata = MetadataCatalog.get("data_detection_train")
for d in random.sample(dataset_dicts, 3):
  img = cv2.imread(d["file_name"])
  visualizer = Visualizer(img[:, :, ::-1], metadata=data_detection_metadata, scale=0.2)
  out = visualizer.draw_dataset_dict(d)
  cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
cfg = get_cfg()
cfg.OUTPUT_DIR = "{}/output/".format(BASE_DIR)
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("data_detection_train",)
cfg.DATASETS.TEST = ("data_detection_test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 500
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 768
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

In [None]:
# Tensorboard:
%load_ext tensorboard
%tensorboard --logdir logs

In [None]:
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.6
predictor = DefaultPredictor(cfg)

In [None]:
dataset_dicts = get_detection_data("test")
for d in random.sample(dataset_dicts, 3):
  im = cv2.imread(d["file_name"])
  outputs = predictor(im)
  v = Visualizer(im[:, :, ::-1], metadata=data_detection_metadata, scale=0.2, instance_mode=ColorMode.IMAGE_BW)
  out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
  cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
evaluator = COCOEvaluator("data_detection_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "data_detection_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

## Semantic Segmentation

In [None]:
def get_instance_sample(data, idx, img=None):
  if img == None:
    img = cv2.imread(data["file_name"])
  ann = data["annotations"][idx]
  bbox = [int(a) for a in ann["bbox"]]
  obj_img = img[bbox[1]:(bbox[1]+bbox[3]), bbox[0]:(bbox[0]+bbox[2])]

  obj_mask = detectron2.utils.visualizer.GenericMask(ann["segmentation"], data["height"], data["width"]).mask
  obj_mask = obj_mask[bbox[1]:(bbox[1]+bbox[3]), bbox[0]:(bbox[0]+bbox[2])]

  return obj_img, obj_mask

In [None]:
class PlaneDataset(Dataset):
  def __init__(self, set_name, data_list):
    self.transforms = transforms.Compose([transforms.ToTensor(),])
    self.set_name = set_name
    self.data = data_list
    self.instance_map = []
    for i, d in enumerate(self.data):
      for j in range(len(d['annotations'])):
        self.instance_map.append([i,j])

  def __len__(self):
    return len(self.instance_map)

  def numpy_to_tensor(self, img, mask):
    if self.transforms is not None:
      img = self.transforms(img)
    img = torch.tensor(img, dtype=torch.float)
    mask = torch.tensor(mask, dtype=torch.float)
    return img, mask

  def __getitem__(self, idx):
    fixed_size = 128
    if torch.is_tensor(idx):
      idx = idx.tolist()
    idx = self.instance_map[idx]
    data = self.data[idx[0]]
    img, mask = get_instance_sample(data, idx[1])
    img = torch.nn.functional.interpolate(torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).float(), (fixed_size, fixed_size))[0].permute(1, 2, 0).byte()
    mask = torch.tensor(np.resize(mask, (fixed_size, fixed_size)), dtype=torch.float)
    return img, mask

def get_plane_dataset(set_name='train', batch_size=2):
  my_data_list = DatasetCatalog.get("data_detection_{}".format(set_name))
  dataset = PlaneDataset(set_name, my_data_list)
  loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, shuffle=True)
  return loader, dataset

In [None]:
class conv(nn.Module):
  def __init__(self, in_ch, out_ch, activation=True):
    super(conv, self).__init__()
    if(activation):
      self.layer = nn.Sequential(
        nn.Conv2d(in_ch, out_ch, 3, padding=1),
        nn.BatchNorm2d(out_ch),
        nn.ReLU(inplace=True)
      )
    else:
      self.layer = nn.Sequential(
        nn.Conv2d(in_ch, out_ch, 3, padding=1)  
      )

  def forward(self, x):
    return self.layer(x)

class down(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(down, self).__init__()
    self.layer = nn.Sequential(
      conv(in_ch, out_ch),
      nn.MaxPool2d(2)
    )

  def forward(self, x):
    return self.layer(x)

class up(nn.Module):
  def __init__(self, in_ch, out_ch, bilinear=False):
    super(up, self).__init__()
    if bilinear:
      self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
    else:
      self.up = nn.ConvTranspose2d(in_ch, in_ch, 2, stride=2)
    self.conv = conv(in_ch, out_ch)

  def forward(self, x):
    return self.conv(self.up(x))

class MyModel(nn.Module):
  def __init__(self):
    super(MyModel, self).__init__()
    
    self.encoder = nn.ModuleList([
      conv(3, 8),
      down(8, 16), down(16, 32), down(32, 64), down(64, 128), down(128, 256), down(256, 512)
    ])
    
    self.decoder = nn.ModuleList([
      up(512, 256), up(256, 128), up(128, 64), up(64, 32), up(32, 16), up(16, 8),
      conv(8, 3)
    ])
    
    self.output_conv = conv(3, 1, False)

    self.norm_layers = nn.ModuleList([
      nn.BatchNorm2d(8), nn.BatchNorm2d(16), nn.BatchNorm2d(32), nn.BatchNorm2d(64), nn.BatchNorm2d(128), nn.BatchNorm2d(256)
    ])

  def forward(self, x):
    enc_outs = []
    for layer in self.encoder:
      x = layer(x)
      enc_outs.append(x)
    
    for i, layer in enumerate(self.decoder[:-1]):
      x = layer(x) + enc_outs[-(i+2)]
      x = self.norm_layers[-(i+1)](F.relu(x))
    
    return self.output_conv(self.decoder[-1](x))

In [None]:
num_epochs = 50
batch_size = 32
learning_rate = 1e-1
weight_decay = 1e-5

model = MyModel()
model = model.cuda()
loader, _ = get_plane_dataset('train', batch_size)
crit = nn.BCEWithLogitsLoss()
optim = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

i = 1
for epoch in range(num_epochs):
  total_loss = 0
  if i % 15 == 0 and i > 16:
    for param_group in optim.param_groups:
      param_group['lr'] /=10
  for (img, mask) in tqdm(loader):
    img = torch.tensor(img, device=torch.device('cuda'), requires_grad = True)
    mask = torch.tensor(mask, device=torch.device('cuda'), requires_grad = True)
    mask = torch.unsqueeze(mask, 1)
    img = img.permute(0,3,1,2)
    pred = model(img)
    loss = crit(pred, mask)
    optim.zero_grad()
    loss.backward()
    optim.step()
    total_loss += loss.cpu().data
  print("Epoch: {}, Loss: {}".format(epoch, total_loss/len(loader)))
  i += 1

torch.save(model.state_dict(), '{}/output/final_segmentation_model.pth'.format(BASE_DIR))

In [None]:
batch_size = 1
model = MyModel().cuda()
model.load_state_dict(torch.load('{}/output/final_segmentation_model.pth'.format(BASE_DIR)))
model = model.eval()
loader, dataset = get_plane_dataset('train', batch_size)
sigmoid = nn.Sigmoid()

def iou(true, pred):
  true = np.rint(sigmoid(true))
  pred = np.rint(sigmoid(pred))
  inter = np.count_nonzero(np.multiply(true, pred))
  union = np.count_nonzero(true+pred)
  if union == 0:
    return 0
  return inter/union

total_iou = 0
counter = 0
for (img, mask) in tqdm(loader):
  with torch.no_grad():
    img = img.permute(0,3,1,2).cuda()
    mask = mask.cuda().cpu()
    pred = model(img).detach().cpu()
    for i in range(img.shape[0]):
      total_iou += iou(np.array(mask[i])[0], np.array(pred[i])[0])
      counter += 1

print("\n #images: {}, Mean IoU: {}".format(counter, total_iou/counter))

In [None]:
loader, dataset = get_plane_dataset('train', batch_size)

img, mask = next(iter(loader))
with torch.no_grad():
  img = img.cuda()
  mask = mask.cpu().detach()
  pred = model(img).cpu().detach()
  for i in range(img.shape[0]):
    cv2_imshow(np.array(transforms.ToPILImage()(img[i].cpu())))
    cv2_imshow(np.rint(sigmoid(np.array(pred[i])[0])) * 255)
    cv2_imshow(np.array(mask[i])[0] * 255)

## Instance Segmentation

In [None]:
def get_prediction_mask(data):
  img = cv2.imread(data['file_name'])
  pred_masks = np.zeros([data['height'], data['width']])
    
  def process_bbox(bbox, i):
    y1, x1, y2, x2 = bbox
    mask = model(torch.unsqueeze(transforms.ToTensor()(cv2.resize(img[x1:x2, y1:y2], (128, 128), interpolation=cv2.INTER_AREA)).to('cuda'), 0)).cpu().detach().numpy()[0]
    mask = np.rint(sigmoid(cv2.resize(mask[0], (y2 - y1, x2 - x1), interpolation=cv2.INTER_AREA))) * (i + 1)
    pred = pred_masks[x1:x2, y1:y2]
    pred[pred == 0] = 10000
    overlap = np.minimum(pred, mask)
    overlap[overlap == 1000] = 0
    pred_masks[x1:x2, y1:y2] = overlap

  if 'annotations' in data:
    for i, annotation in enumerate(data['annotations']):
      process_bbox(annotation['bbox'], i)
  else:
    prediction_for_img = predictor(img)['instances']
    for i in range(len(prediction_for_img)):
      process_bbox(np.array(np.floor(list(prediction_for_img[i]._fields['pred_boxes'])[0].cpu().numpy()), dtype=np.uint32), i)

  true_mask = np.zeros([data['height'], data['width']])
  for j, annotation in enumerate(data['annotations']):
    y1, x1, y2, x2 = [int(i) for i in annotation['bbox']]
    x2 += x1
    y2 += y1
    local_true_mask = detectron2.utils.visualizer.GenericMask(annotation['segmentation'], data['height'], data['width']).mask
    overlapping = np.maximum(true_mask[x1:x2, y1:y2], local_true_mask[x1:x2, y1:y2] * (j + 1))
    true_mask[x1:x2, y1:y2] = overlapping

  return img, torch.tensor(true_mask, device=torch.device('cuda')), torch.tensor(pred_masks, device=torch.device('cuda'))

In [None]:
dataset = get_detection_data('train')
for i in np.random.randint(0, 50, 3):
  img, true_mask, pred_mask = get_prediction_mask(dataset[i])
  pred_mask *= 255./ pred_mask.cpu().numpy().max()
  true_mask *= 255./ true_mask.cpu().numpy().max()
  cv2_imshow(cv2.resize(img, (img.shape[1]//3, img.shape[0]//3), interpolation = cv2.INTER_AREA))
  cv2_imshow(cv2.resize(true_mask, (true_mask.shape[1]//3, true_mask.shape[0]//3), interpolation = cv2.INTER_AREA))
  cv2_imshow(cv2.resize(pred_mask, (pred_mask.shape[1]//3, pred_mask.shape[0]//3), interpolation = cv2.INTER_AREA))

In [None]:
def rle_encoding(x):
  dots = torch.where(torch.flatten(x.long())==1)[0]
  if(len(dots)==0):
    return []
  inds = torch.where(dots[1:]!=dots[:-1]+1)[0]+1
  inds = torch.cat((torch.tensor([0], device=torch.device('cuda'), dtype=torch.long), inds))
  tmpdots = dots[inds]
  inds = torch.cat((inds, torch.tensor([len(dots)], device=torch.device('cuda'))))
  inds = inds[1:] - inds[:-1]
  runs = torch.cat((tmpdots, inds)).reshape((2,-1))
  runs = torch.flatten(torch.transpose(runs, 0, 1)).cpu().data.numpy()
  return ' '.join([str(i) for i in runs])

In [None]:
preddic = {"ImageId": [], "EncodedPixels": []}

my_data_list = DatasetCatalog.get("data_detection_{}".format('train'))
for i in tqdm(range(len(my_data_list)), position=0, leave=True):
  sample = my_data_list[i]
  sample['image_id'] = sample['file_name'].split("/")[-1][:-4]
  img, true_mask, pred_mask = get_prediction_mask(sample)
  inds = torch.unique(pred_mask)
  if(len(inds)==1):
    preddic['ImageId'].append(sample['image_id'])
    preddic['EncodedPixels'].append([])
  else:
    for index in inds:
      if(index == 0):
        continue
      tmp_mask = (pred_mask==index)
      encPix = rle_encoding(tmp_mask)
      preddic['ImageId'].append(sample['image_id'])
      preddic['EncodedPixels'].append(encPix)

my_data_list = DatasetCatalog.get("data_detection_{}".format('test'))
for i in tqdm(range(len(my_data_list)), position=0, leave=True):
  sample = my_data_list[i]
  sample['image_id'] = sample['file_name'].split("/")[-1][:-4]
  img, true_mask, pred_mask = get_prediction_mask(sample)
  inds = torch.unique(pred_mask)
  if(len(inds)==1):
    preddic['ImageId'].append(sample['image_id'])
    preddic['EncodedPixels'].append([])
  else:
    for j, index in enumerate(inds):
      if(index == 0):
        continue
      tmp_mask = (pred_mask==index).double()
      encPix = rle_encoding(tmp_mask)
      preddic['ImageId'].append(sample['image_id'])
      preddic['EncodedPixels'].append(encPix)

pred_file = open("{}/pred.csv".format(BASE_DIR), 'w')
pd.DataFrame(preddic).to_csv(pred_file, index=False)
pred_file.close()

## Part 4: Mask R-CNN

In [None]:
cfg = get_cfg()
cfg.OUTPUT_DIR = "{}/output/".format(BASE_DIR)
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("data_detection_train",)
cfg.DATASETS.TEST = ("data_detection_test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 500
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

In [None]:
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

In [None]:
dataset_dicts = get_detection_data("val")
for d in random.sample(dataset_dicts, 3):    
  im = cv2.imread(d["file_name"])
  outputs = predictor(im)
  v = Visualizer(im[:, :, ::-1], metadata=data_detection_metadata, scale=0.2, instance_mode=ColorMode.IMAGE_BW)
  out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
  cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
evaluator = COCOEvaluator("data_detection_val", cfg, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, "data_detection_val")
print(inference_on_dataset(trainer.model, val_loader, evaluator))