# Download data


In [None]:
!git clone https://github.com/anminhhung/vehicle_data

# Setup Detectron2

In [None]:
!pip install cython pyyaml==5.1
!python -m pip install torch==1.5.0+cu101 torchvision==0.6.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html && \
python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.5/index.html

In [None]:
!git clone https://github.com/facebookresearch/detectron2

In [None]:
# %cd detectron2
# !python -m pip install -e .
# !python setup.py build develop
# %cd ..

# Import package

In [None]:
import os
import cv2
import json
import random
import itertools
import torch
import copy
import numpy as np

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer,  DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader
from detectron2.structures import BoxMode
from detectron2.data import detection_utils
import detectron2.data.transforms as T

from google.colab.patches import cv2_imshow

# Convert yolo -> json (detectron format)

In [None]:
def get_data_dicts_mrcnn(img_dir):
    anno_files = [f for f in os.listdir(img_dir) if f.split(".")[-1] != 'txt']
    # print("anno_files: ", anno_files)
    classes = ['0', '1', '2', '3', '4']

    dataset_dicts = []
    count = 0
    fail_cnt = 0
    for image_id, filename in enumerate(anno_files):
      try:
        record = {}
        img_path = os.path.join(img_dir, filename)
        # print("img_path: ", img_path)
        height, width = cv2.imread(img_path).shape[:2]
        
        record["file_name"] = img_path
        record["image_id"] = image_id
        record["height"] = height
        record["width"] = width

        txt_path = filename.split(".")[0] + '.txt'
        annotations = open(os.path.join(img_dir, txt_path), 'r')
        objs = []
        for line in annotations:
          line = line.rstrip('\n')
          class_id, x_center, y_center, w, h = line.split()[:]
          w = int(float(w) * width)
          h = int(float(h) * height)
          xmin = int((float(x_center) * width) - w/2)
          ymin = int((float(y_center) * height) - h/2)
          xmax = xmin + w
          ymax = ymin + h

          px = [xmin, xmax, xmax, xmin]
          py = [ymin, ymin, ymax, ymax]

          poly = [(x, y) for x, y in zip(px, py)]
          poly = list(itertools.chain.from_iterable(poly))

          obj = {
                'bbox': [xmin, ymin, xmax, ymax],
                'bbox_mode': BoxMode.XYXY_ABS,
                'segmentation': [poly],
                'category_id': int(class_id),
                "iscrowd": 0
          }
          objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
        count += 1
        # print(count)
      except:
        fail_cnt += 1
        with open("fail_log.txt", "a+") as f:
          f.write("path image failed: {}\n".format(img_path))

    # print("fail_cnt: ", fail_cnt)
    return dataset_dicts

In [None]:
train_dicts = get_data_dicts_mrcnn('vehicle_data/train')
val_dicts = get_data_dicts_mrcnn('vehicle_data/val')

# write and save data 
with open('train_dicts_rcnn.json', 'w') as fp:
    json.dump(train_dicts, fp)

with open('val_dicts_rcnn.json', 'w') as fp:
    json.dump(val_dicts, fp)

# Load json data, trainer

In [None]:
# Load json data
with open('train_dicts_rcnn.json', 'r') as fp:
    val_dicts = json.load(fp)

with open('val_dicts_rcnn.json', 'r') as fp:
    train_dicts = json.load(fp)

In [None]:
for i in range(len(val_dicts)):
  for j in range(len(val_dicts[i]["annotations"])):
      val_dicts[i]["annotations"][j]['bbox_mode'] = BoxMode.XYXY_ABS
      
for i in range(len(train_dicts)):
  for j in range(len(train_dicts[i]["annotations"])):
      train_dicts[i]["annotations"][j]['bbox_mode'] = BoxMode.XYXY_ABS

In [None]:
classes = ['0', '1', '2', '3', '4']
data = [train_dicts, val_dicts]

for index, d in enumerate(["train", "val"]):
  DatasetCatalog.register("vehicle_data/" + d, lambda index=index: data[index])
  MetadataCatalog.get("vehicle_data/" + d).set(thing_classes=classes)
  
vehicle_metadata = MetadataCatalog.get("vehicle_data/train")

In [None]:
# Without Augmentation
class CustomTrainer(DefaultTrainer):
  @classmethod
  def build_evaluator(cls, cfg, dataset_name, output_folder=None):

    if output_folder is None:
        os.makedirs("mask_rcnn_R_101_FPN_3x_new_train", exist_ok=True) # name dir
        output_folder = "mask_rcnn_R_101_FPN_3x_new_train"

    return COCOEvaluator(dataset_name, cfg, False, output_folder)

In [None]:
def custom_mapper(dataset_dict, size = (800,800), flip_prob = 0, min_brightness = 0.5, max_brightness = 1.5, \
                min_contrast = 0.5, max_contrast = 1.5, min_saturation = 0.5, max_saturation = 1.5):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = detection_utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [ 
                    T.Resize(size),
                    T.RandomBrightness(min_brightness, max_brightness),
                    T.RandomContrast(min_contrast, max_contrast),
                    T.RandomSaturation(min_saturation, max_saturation),

                    T.RandomFlip(prob=flip_prob, horizontal=False, vertical=True),
                    T.RandomFlip(prob=flip_prob, horizontal=True, vertical=False), 
                ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        detection_utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = detection_utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = detection_utils.filter_empty_instances(instances)
    return dataset_dict

# Training with augmentation
class AugmentTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):

        if output_folder is None:
            os.makedirs("Evaluate_dir", exist_ok=True)
            output_folder = "Evaluate_dir"

        return COCOEvaluator(dataset_name, cfg, False, output_folder)

# Visualize data

In [None]:
import random
import matplotlib.pyplot as plt

# dataset_dicts = get_license_plate_dicts('detectron2/datasets/license_plate_dataset/val')
for d in random.sample(val_dicts, 10):
    img = cv2.imread(d["file_name"])
    v = Visualizer(img[:, :, ::-1], metadata=vehicle_metadata, scale=0.5)
    v = v.draw_dataset_dict(d)
    plt.figure(figsize = (14, 10))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()

# Train

In [None]:
cfg = get_cfg()
cfg.merge_from_file("./detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("vehicle_data/train",)
cfg.DATASETS.TEST = ("vehicle_data/val",)   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 4

#https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
# cfg.MODEL.WEIGHTS = "mask_rcnn_R_101_FPN_3x_model_new_train/model_final.pth" # load saved model
# cfg.MODEL.WEIGHTS = "" # without transfer learning

cfg.SOLVER.IMS_PER_BATCH = 8
cfg.SOLVER.BASE_LR = 0.00001
cfg.SOLVER.MAX_ITER = 1000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 32
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 7
cfg.OUTPUT_DIR = "./mask_rcnn_R_101_FPN_3x_model_new_train"

cfg.TEST.EVAL_PERIOD = 500 # eval for each 500 iters

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

# Predict 

In [None]:
import os
import cv2
import json
import time
import random
import itertools
import numpy as np

from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset, evaluator
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.structures import BoxMode
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

import json

def predict (path_weigths, path_config, confidence_threshold, num_of_class, path_img
             
             
             ):
  cfg = get_cfg()
  cfg.merge_from_file(path_config)
  cfg.MODEL.WEIGHTS = path_weigths

  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = confidence_threshold
  cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8   
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_of_class 
  predictor = DefaultPredictor(cfg)
  im = cv2.imread(path_img)
  outputs = predictor(im)

  boxes = outputs["instances"].pred_boxes 
  scores = outputs["instances"].scores 
  classes = outputs["instances"].pred_classes 

  list_bbox = []
  for bbox in boxes:
    list_tmp = []
    for ele in bbox:
      list_tmp.append(int(ele))
    
    list_bbox.append(list_tmp)
  
  list_classes = []
  for my_cls in classes:
    list_classes.append(classes_raw[my_cls])
  
  return outputs, list_bbox, scores, list_classes

#Đầu vào detect = output của hàm predict, frame = original image của mình, classs = tên class để visualize
def visualize (out, frame, classs):
  boxes = out['instances'].pred_boxes
  scores = out['instances'].scores
  classes = out['instances'].pred_classes
  for i in range (len(classes)):
    if (scores[i] > 0.4):
      for j in boxes[i]:
        start = (int (j[0]), int (j[1]))
        end = (int (j[2]), int (j[3]))
        print (start)
        print (end)
        width =  end[0] - start[0]
        height = end[1] - start[1]
        print ('width:', width)
        print ('height:', height)
        print('class:', int (classes[i]))
        print('score:', float (scores[i]))
        print ('---------------------', start, end, scores[i], classes[i])
      color = int (classes[i])
      print (classes[i])
        
      cv2.rectangle(frame, start, end, (random.randint(0,255),random.randint(0,255),255), 1)
      cv2.putText(frame, str (classs[color]),start, cv2.FONT_HERSHEY_PLAIN, 1, (random.randint(0,255),random.randint(0,255),255), 2)
  return frame


path_weigth = 'mask_rcnn_R_101_FPN_3x_model_new_train/model_final.pth'
path_config = './detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml'
confidences_threshold = 0.5
num_of_class = 5
path_img = 'vehicle_data/val/cam_01_0.jpg'
classes_raw = ['0', '1', '2', '3', '4']

outputs, boxes, scores, classes = predict(path_weigth, path_config, confidences_threshold, num_of_class, path_img)
print(outputs)
_frame = cv2.imread(path_img)
frame = visualize (outputs, _frame, classes )
cv2.imwrite("frame.jpg", frame)
cv2_imshow(frame)
