# Install detectron2

In [1]:
!pip install pyyaml==5.1

import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
# Install detectron2 that matches the above pytorch version
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html
# If there is not yet a detectron2 release that matches the given torch + CUDA version, you need to install a different pytorch.

exit(0)  # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l[K     |█▏                              | 10 kB 16.4 MB/s eta 0:00:01[K     |██▍                             | 20 kB 19.6 MB/s eta 0:00:01[K     |███▋                            | 30 kB 23.0 MB/s eta 0:00:01[K     |████▉                           | 40 kB 24.5 MB/s eta 0:00:01[K     |██████                          | 51 kB 18.6 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 12.6 MB/s eta 0:00:01[K     |████████▍                       | 71 kB 12.0 MB/s eta 0:00:01[K     |█████████▋                      | 81 kB 13.1 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 12.6 MB/s eta 0:00:01[K     |████████████                    | 102 kB 13.4 MB/s eta 0:00:01[K     |█████████████▏                  | 112 kB 13.4 MB/s eta 0:00:01[K     |██████████████▍                 | 122 kB 13.4 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 13.4 MB/s eta 0:00:01[

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Colab Notebooks/visual recognition/HW3

/content/drive/MyDrive/Colab Notebooks/visual recognition/HW3


# Use GPU

In [3]:
import torch
torch.cuda.is_available()

True

In [4]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, random, cv2
from PIL import Image
from google.colab.patches import cv2_imshow
import pycocotools.mask as mask_util

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import BoxMode

## Register Dataset


In [None]:
#register custom datasets to Detectron2
from detectron2.data.datasets import register_coco_instances
register_coco_instances("train_data", {}, "dataset/train/train.json", "dataset/train/images")

In [None]:
# get the information of dataset
coco_metadata = MetadataCatalog.get("train_data")
dataset_dicts = DatasetCatalog.get("train_data")
coco_metadata

[32m[12/16 11:59:52 d2.data.datasets.coco]: [0mLoading dataset/train/train.json takes 1.43 seconds.
[32m[12/16 11:59:52 d2.data.datasets.coco]: [0mLoaded 24 images in COCO format from dataset/train/train.json


Metadata(evaluator_type='coco', image_root='dataset/train/images', json_file='dataset/train/train.json', name='train_data', thing_classes=['nucleus'], thing_dataset_id_to_contiguous_id={1: 0})

# Set the setting in config

In [9]:
# load basic setting
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")) #ResNeXt 101
cfg.DATASETS.TRAIN = ("train_data",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 3000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
#cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (balloon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

# Predict

In [5]:
def instances_to_coco_json(instances, img_id, results):
  """
  Dump an "Instances" object to a COCO-format json that's used for evaluation.

  Args:
      instances (Instances):
      img_id (int): the image id

  Returns:
      list[dict]: list of json annotations in COCO format.
  """
  num_instance = len(instances)
  if num_instance == 0:
      return []

  boxes = instances.pred_boxes.tensor.numpy()
  boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
  boxes = boxes.tolist()
  scores = instances.scores.tolist()
  classes = instances.pred_classes.tolist()

  has_mask = instances.has("pred_masks")
  if has_mask:
      # use RLE to encode the masks, because they are too large and takes memory
      # since this evaluator stores outputs of the entire dataset
      rles = [
          mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
          for mask in instances.pred_masks
      ]
      for rle in rles:
          # "counts" is an array encoded by mask_util as a byte-stream. Python3's
          # json writer which always produces strings cannot serialize a bytestream
          # unless you decode it. Thankfully, utf-8 works out (which is also what
          # the pycocotools/_mask.pyx does).
          rle["counts"] = rle["counts"].decode("utf-8")

  has_keypoints = instances.has("pred_keypoints")
  if has_keypoints:
      keypoints = instances.pred_keypoints

  #results = []
  for k in range(num_instance):
      result = {
          "image_id": img_id,
          "category_id": 1,
          "bbox": boxes[k],
          "score": scores[k],
      }
      if has_mask:
          result["segmentation"] = rles[k]
      if has_keypoints:
          # In COCO annotations,
          # keypoints coordinates are pixel indices.
          # However our predictions are floating point coordinates.
          # Therefore we subtract 0.5 to be consistent with the annotation format.
          # This is the inverse of data loading logic in `datasets/coco.py`.
          keypoints[k][:, :2] -= 0.5
          result["keypoints"] = keypoints[k].flatten().tolist()
      results.append(result)
  return results

In [6]:
class MyEncoder(json.JSONEncoder):
  def default(self,obj):
    if isinstance(obj,np.ndarray):
      return obt.tolist()
    elif isinstance(obj,bytes):
      return str(obj, encoding = 'utf-8')
    return json.JSONEncoder.default(self,obj)

In [7]:
# save result to json
def save_to_json(result_to_json):
  folder = os.path.join(cfg.OUTPUT_DIR, 'annotations')
  if not os.path.exists(folder):
      os.makedirs(folder)

  json_name = os.path.join('{}/annotations/{}'.format(cfg.OUTPUT_DIR,'answer.json'))
  with open(json_name, 'w') as f:
      json.dump(result_to_json, f, cls = MyEncoder)
      print('Save annotation to {}'.format(json_name))

In [12]:
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer, ColorMode
import matplotlib.pyplot as plt
import cv2
import os

cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1
cfg.TEST.DETECTIONS_PER_IMAGE = 2500
predictor = DefaultPredictor(cfg)

test_dir = "dataset/test"
test_set = "dataset/test_img_ids.json"

with open(test_set) as f:
    test_data = json.load(f)

result_to_json = []
json_name = os.path.join('{}/annotations/{}'.format(cfg.OUTPUT_DIR,'answer.json'))

for d in test_data:
  im = cv2.imread(os.path.join(test_dir , d["file_name"]))
  outputs = predictor(im)

  #save the answer result
  answer = outputs["instances"].to("cpu")
  result = instances_to_coco_json(answer, d["id"], result_to_json)

save_to_json(result_to_json)

  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Save annotation to ./output/annotations/answer.json
