In [1]:
# For testing detectron2 mask r-cnn with COCO 

import detectron2 

#Setup logger
from detectron2.utils.logger import setup_logger
setup_logger()

#Some common libraries 
import numpy as np 
import os,json,cv2,random 


#Detectron2 utilities 

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg 
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

## Run a pre-trained model 

In [4]:
im = cv2.imread(r"/Users/HeleneSemb/Documents/Master/Kode /images/input.png")
cv2.imshow("image",im)
cv2.waitKey(0)
  
# closing all open windows
cv2.destroyAllWindows()

In [6]:
cfg = get_cfg() 

cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 #treshold for this model 

cfg.MODEL.DEVICE='cpu'

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)

[32m[01/23 10:17:43 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl ...


model_final_f10217.pkl: 178MB [00:15, 11.7MB/s]                              
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [7]:
v = Visualizer(im[:,:,::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

cv2.imshow("imageout", out.get_image()[:,:,::-1])
cv2.waitKey(0)
  
# closing all open windows
cv2.destroyAllWindows()



## Custom dataset: Balloons 

In [2]:
from detectron2.structures import BoxMode 

def get_balloon_dicts(img_dir): 
    json_file = os.path.join(img_dir, "via_region_data.json")
    with open(json_file) as f: 
        imgs_anns = json.load(f)
    
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx 
        record["height"] = height 
        record["width"] = width 

        annos = v["regions"]
        objs = []

        for _, anno in annos.items():
            assert not anno["region_attributes"]
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px,py)]
            poly = [p for x in poly for p in x]

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS, 
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        
        record["annotations"] = objs
        dataset_dicts.append(record) 
    return dataset_dicts

for d in ["train", "val"]:
    DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts(r"/Users/HeleneSemb/Documents/Master/Kode/balloon/" + d))
    MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])

balloon_metadata = MetadataCatalog.get("ballon_train")
        

In [3]:
dataset_dicts = get_balloon_dicts(r"/Users/HeleneSemb/Documents/Master/Kode /balloon/train")
for d in random.sample(dataset_dicts, 1): 
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:,:,::-1], metadata = balloon_metadata, scale =0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2.imshow("imageout", out.get_image()[:,:,::-1])
    cv2.waitKey(0)
    # closing all open windows
    cv2.destroyAllWindows()

In [5]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg() 
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("balloon_train")
cfg.DATASETS.TEST = ()
cfg.MODEL.DEVICE = "cpu"
cfg.DATALOADER.NUM_WORKERS = 2 
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.SOLVER.IMS_PER_BATCH = 1 
cfg.SOLVER.BASE_LR = 0.00025 
cfg.SOLVER.MAX_ITER = 300 
cfg.SOLVER.STEPS = [] 
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


[32m[01/23 11:44:19 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in

[32m[01/23 11:44:24 d2.engine.train_loop]: [0mStarting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[01/23 11:51:06 d2.utils.events]: [0m eta: 1:31:32  iter: 19  total_loss: 2.096  loss_cls: 0.7468  loss_box_reg: 0.5747  loss_mask: 0.684  loss_rpn_cls: 0.01686  loss_rpn_loc: 0.006547  time: 19.4967  data_time: 0.6553  lr: 1.6068e-05  
[32m[01/23 11:57:25 d2.utils.events]: [0m eta: 1:26:46  iter: 39  total_loss: 1.949  loss_cls: 0.6027  loss_box_reg: 0.7474  loss_mask: 0.5953  loss_rpn_cls: 0.01133  loss_rpn_loc: 0.004719  time: 19.1845  data_time: 0.0072  lr: 3.2718e-05  
[32m[01/23 12:04:44 d2.utils.events]: [0m eta: 1:20:15  iter: 59  total_loss: 1.785  loss_cls: 0.4711  loss_box_reg: 0.7579  loss_mask: 0.4797  loss_rpn_cls: 0.01736  loss_rpn_loc: 0.004164  time: 20.1472  data_time: 0.0077  lr: 4.9367e-05  
[32m[01/23 12:11:22 d2.utils.events]: [0m eta: 1:13:18  iter: 79  total_loss: 1.322  loss_cls: 0.3402  loss_box_reg: 0.6669  loss_mask: 0.3171  loss_rpn_cls: 0.006203  loss_rpn_loc: 0.002194  time: 20.0796  data_time: 0.0076  lr: 6.6017e-05  
[32m[01/23 12:18:31 d2.

In [7]:
%load_ext tensorboard
%tensorboard --logdir output

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 43302), started 0:05:08 ago. (Use '!kill 43302' to kill it.)

### Inference 

In [16]:

cfg.MODEL.WEIGHTS = os.path.join(r"/Users/HeleneSemb/Documents/Master/Kode/output", "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg) 

[32m[01/23 14:31:51 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from /Users/HeleneSemb/Documents/Master/Kode/output/model_final.pth ...


In [18]:
from detectron2.utils.visualizer import ColorMode 
dataset_dicts = get_balloon_dicts(r"/Users/HeleneSemb/Documents/Master/Kode/balloon/val")
for d in random.sample(dataset_dicts, 1): 
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                    metadata = balloon_metadata,
                    scale = 0.5,
                    instance_mode = ColorMode.IMAGE_BW)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow("image",out.get_image()[:,:,::-1])
    cv2.waitKey(0)
    # closing all open windows
    cv2.destroyAllWindows()
    

In [19]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset 
from detectron2.data import build_detection_test_loader 
evaluator = COCOEvaluator("balloon_val", output_dir = "/Users/HeleneSemb/Documents/Master/Kode/output")
val_loader = build_detection_test_loader(cfg, "balloon_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))


[32m[01/23 14:42:37 d2.evaluation.coco_evaluation]: [0mTrying to convert 'balloon_val' to COCO format ...
[32m[01/23 14:42:37 d2.data.datasets.coco]: [0mConverting annotations of dataset 'balloon_val' to COCO format ...)
[32m[01/23 14:42:38 d2.data.datasets.coco]: [0mConverting dataset dicts into COCO format
[32m[01/23 14:42:39 d2.data.datasets.coco]: [0mConversion finished, #images: 13, #annotations: 50
[32m[01/23 14:42:39 d2.data.datasets.coco]: [0mCaching COCO format annotations at '/Users/HeleneSemb/Documents/Master/Kode/output/balloon_val_coco_format.json' ...
[32m[01/23 14:42:39 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|  balloon   | 50           |
|            |              |[0m
[32m[01/23 14:42:39 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[01/2