In [1]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Jun__8_16:49:14_PDT_2022
Cuda compilation tools, release 11.7, V11.7.99
Build cuda_11.7.r11.7/compiler.31442593_0
torch:  1.11 ; cuda:  cu113
detectron2: 0.6


In [9]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from PIL import Image
from IPython.display import display
from matplotlib import pyplot as plt

# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator, DatasetEvaluators
from detectron2.engine import DefaultTrainer

# Fisheye Dataset

In [10]:
register_coco_instances("fisheye8k_train", {}, 
                        "/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/train/train.json", 
                        "/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/train/images")
register_coco_instances("fisheye8k_val", {}, 
                        "/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/test/test.json", 
                        "/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/test/images")

AssertionError: Dataset 'fisheye8k_train' is already registered!

In [None]:
class Trainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
        return DatasetEvaluators([COCOEvaluator("fisheye8k_val", output_dir="./output")])

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("fisheye8k_train",)
cfg.DATASETS.TEST = ("fisheye8k_val",)
cfg.TEST.EVAL_PERIOD = 100
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 32  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025   # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []          # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 5  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[04/01 18:13:48 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (6, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (6,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (20, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (20,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls

[32m[04/01 18:13:49 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[04/01 18:14:35 d2.utils.events]: [0m eta: 0:09:09  iter: 19  total_loss: 3.698  loss_cls: 1.943  loss_box_reg: 0.3966  loss_rpn_cls: 1.021  loss_rpn_loc: 0.2996    time: 1.9180  last_time: 1.7007  data_time: 0.9120  last_data_time: 0.5298   lr: 1.6068e-05  max_mem: 45332M
[32m[04/01 18:15:15 d2.utils.events]: [0m eta: 0:09:04  iter: 39  total_loss: 2.474  loss_cls: 1.617  loss_box_reg: 0.4158  loss_rpn_cls: 0.1503  loss_rpn_loc: 0.2575    time: 1.9783  last_time: 2.3698  data_time: 0.6254  last_data_time: 0.7745   lr: 3.2718e-05  max_mem: 45332M
[32m[04/01 18:15:56 d2.utils.events]: [0m eta: 0:08:39  iter: 59  total_loss: 1.905  loss_cls: 1.011  loss_box_reg: 0.4574  loss_rpn_cls: 0.1081  loss_rpn_loc: 0.2573    time: 1.9921  last_time: 2.3148  data_time: 0.6562  last_data_time: 0.7267   lr: 4.9367e-05  max_mem: 45332M
[32m[04/01 18:16:36 d2.utils.events]: [0m eta: 0:07:56  iter: 79  total_l

In [None]:
%load_ext tensorboard
%tensorboard --logdir output

## Inference

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.utils.visualizer import ColorMode
fisheye_metadata = MetadataCatalog.get("fisheye8k_train")

with open("/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/train/train.json") as f:
    imgs_anns = json.load(f)
imgs_anns

In [None]:
dataset_dicts = imgs_anns["images"]
for d in random.sample(dataset_dicts, 3):
    f = d["file_name"]
    im = cv2.imread(f"/workspace/FishEye8k/dataset/Fisheye8K_all_including_train/train/images/{f}")
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=fisheye_metadata, 
                   scale=0.5)
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(16,9))
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.axis("off")
    plt.show()

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("fisheye8k_val", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "fisheye8k_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`