In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

In [None]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.8)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
import torch
assert torch.__version__.startswith("1.8")   # need to manually install torch 1.8 if Colab changes its default version
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html
# exit(0)  # After installation, you need to "restart runtime" in Colab. This line can also restart runtime

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

| #Values | Names      | Description                                                                                                                |
|---------|------------|----------------------------------------------------------------------------------------------------------------------------|
| 1       | type       | Describes the type of object: 'Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc' or 'DontCare' |
| 1       | truncated  | Float from 0 (non-truncated) to 1 (truncated),                                                                             |
| 1       | occluded   | 0 = fully visible, 1 = partly occluded, 2 = largely occluded, 3 = unknown                                                  |
| 1       | alpha      | Observation angle                                                                                                          |
| 4       | bbox       | left, top, right, bottom pixel coordinates                                                                                 |
| 3       | dimensions | 3D object dimensions: height, width, length (in meters)                                                                    |
| 3       | location   | 3D object location x,y,z in camera coordinates (in meters)                                                                 |
| 1       | rotation_y | Rotation ry around Y-axis in camera coordinates [-pi..pi]                                                                  |
| 1       | score      | Only for results: Float, indicating confidence in detection, needed for p/r curves, higher is better.                      |

In [None]:
Example:

Car 0.60 3 -2.42 0.00 185.93 214.05 348.86 1.56 1.57 4.37 -6.96 1.73 7.83 -3.13

# Register KITTI dataset

In [None]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")

import os
import pandas as pd
from detectron2.structures import BoxMode

def get_kitti_dicts(kitti_dir, indices=range(500)):
  classes = ('Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc', 'DontCare')
  cols = ('type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right',
      'bottom', 'height', 'width', 'length', 'x', 'y', 'z', 'rot_y','score')

  record = {} #Dict with image info for the dataset

  dataset_dicts = []
  for idx in indices:
      img_path = os.path.join(kitti_dir, 'data_object_image_2', 'training', 'image_2', str(idx).zfill(6)+'.png')
      labels_path = os.path.join(kitti_dir, 'training', 'label_2', str(idx).zfill(6)+'.txt')
      
      height, width = cv2.imread(img_path).shape[:2]

      record = {}
      record["file_name"] = img_path
      record["image_id"] = idx
      record["height"] = height
      record["width"] = width

      objs = []

      dtf = pd.read_csv(labels_path, delimiter=' ', names=cols)     
      for i, row in dtf.iterrows():
          # Accessing all available info
          # obj = {
          # 'type': row.type,
          # 'truncated': row.truncated,
          # 'occluded': row.occluded,
          # 'alpha': row.alpha,
          # 'bbox': (row.left, row.top, row.right, row.bottom),
          # 'dims': (row.height, row.width, row.length),
          # 'pos': (row.x, row.y, row.z),
          # 'rot_y': row.rot_y,
          # 'score': row.score
          # }
          # objs.append(obj)

          # Add filters on truncated, occluded and maybe size?
          obj = {
            "bbox": [row.left, row.top, row.right, row.bottom],
            "bbox_mode": BoxMode.XYXY_ABS,
            "category_id": classes.index(row.type),
          }
          objs.append(obj)
      record["annotations"] = objs
      dataset_dicts.append(record)
      
  return dataset_dicts
  '''
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}
        
        filename = os.path.join(img_dir, v["filename"])
        height, width = cv2.imread(filename).shape[:2]
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        annos = v["regions"]
        objs = []
        for _, anno in annos.items():
            assert not anno["region_attributes"]
            anno = anno["shape_attributes"]
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]

            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts
  '''

# get_kitti_dicts('/content/drive/MyDrive/Datasets/KITTI')

# for d in ["train", "val"]:
#     DatasetCatalog.register("balloon_" + d, lambda d=d: get_balloon_dicts("balloon/" + d))
#     MetadataCatalog.get("balloon_" + d).set(thing_classes=["balloon"])
# balloon_metadata = MetadataCatalog.get("balloon_train")
ds_name = 'big-kitti'
DatasetCatalog.register("small-kitti-train-big", lambda : get_kitti_dicts('/content/drive/MyDrive/Datasets/KITTI', range(3500)))
MetadataCatalog.get("small-kitti-train").set(thing_classes=('Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc', 'DontCare') )

DatasetCatalog.register("small-kitti-test-big", lambda : get_kitti_dicts('/content/drive/MyDrive/Datasets/KITTI', range(3500, 4000)))
MetadataCatalog.get("small-kitti-test-big").set(thing_classes=('Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc', 'DontCare') )

kitti_metadata = MetadataCatalog.get("small-kitti-train-big")

In [None]:
kitti_metadata = MetadataCatalog.get("small-kitti-train-big")
# Randomly visualize ds
dataset_dicts = get_kitti_dicts('/content/drive/MyDrive/Datasets/KITTI')
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=kitti_metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
# Train
from detectron2.engine import DefaultTrainer
classes = ('Car', 'Van', 'Truck','Pedestrian', 'Person_sitting', 'Cyclist', 'Tram','Misc', 'DontCare')

mzoo_config_file = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(mzoo_config_file))
cfg.DATASETS.TRAIN = ("small-kitti-train-big",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(mzoo_config_file)  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 5
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)  

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
# Rnadomly visualize results
from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_kitti_dicts('/content/drive/MyDrive/Datasets/KITTI')
for d in random.sample(dataset_dicts, 1):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=kitti_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("small-kitti-test", ("bbox",), False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg,"small-kitti-test")
print(inference_on_dataset(trainer.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`