# Detectron2 Beginner's Tutorial

<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">

Welcome to detectron2! This is the official colab tutorial of detectron2. Here, we will go through some basics usage of detectron2, including the following:
* Run inference on images or videos, with an existing detectron2 model
* Train a detectron2 model on a new dataset

You can make a copy of this tutorial or use "File -> Open in playground mode" to play with it yourself.


# Install detectron2

In [None]:
# install dependencies: (use cu101 because colab has CUDA 10.1)
!pip3 install --upgrade cython pyyaml==5.1
!pip3 install --upgrade 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

In [None]:
# install detectron2:
!pip3 install --upgrade 'git+https://github.com/facebookresearch/detectron2.git'

In [1]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import math
import cv2
import random

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# Run a pre-trained detectron2 model

We first download a random image from the COCO dataset:

In [2]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

In [None]:
# !wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg
im = cv2.imread("./cov.jpg")
cv2_imshow(im)

Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image.

In [None]:
# look at the outputs. See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format for specification
# person_outputs = outputs["instances"].pred_classes[0]
outputs = predictor(im)
person_instances = outputs["instances"][outputs["instances"].pred_classes[:] == 0] # get person predictions only
boxes, pred_cls = person_instances.pred_boxes, person_instances.pred_classes

In [None]:
print(boxes)
it = next(iter(boxes[2]))
print(it[1])

In [None]:
# def draw_line():
minm_dist = 0
ratio = 0.4

for i in person_instances.pred_boxes:
  minm_dist = max(minm_dist,int(i[3] - i[1]))

minm_dist = int(minm_dist * ratio)
print(minm_dist)
mid_points = person_instances.pred_boxes.get_centers()
print(mid_points)

In [None]:
# We can use `Visualizer` to draw the predictions on the image.
color = (1, 0, 0) 

close = 0

vis = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)

v = vis.draw_instance_predictions(person_instances.to("cpu"))

for i in range(len(mid_points)):
  circle_coord = (int(mid_points[i,0]), int(mid_points[i,1]))
  v = vis.draw_circle(circle_coord,color=(0,1,0))

for i in range(len(mid_points)):
  for j in range(i+1, len(mid_points)):
    
    euclid_dist = math.sqrt((mid_points[i,0] - mid_points[j,0])**2 + (mid_points[i,1] - mid_points[j,1])**2)
    
    if euclid_dist <= minm_dist:  
      close += 1      
      x_data = [int(mid_points[i,0].item()), int(mid_points[j,0].item())]
      y_data = [int(mid_points[i,1].item()), int(mid_points[j,1].item())]
      v = vis.draw_line(x_data,y_data, color)

cv2_imshow(v.get_image()[:, :, ::-1])

# Inference Video

In [3]:
def object_detection_api(pred, frame, rect_th=3, text_size=0.5, text_th=2, minm_dist = 0):

    person_instances = pred

    # boxes = person_instances.pred_boxes # Get predictions
    
    # if boxes:
    #     for box in boxes:
            
    #         (x1,y1) = (box[0],box[1])
    #         (x2,y2) = (box[2],box[3])

    #         cv2.rectangle(frame, (x1,y1), (x2,y2), (0, 0, 255), rect_th) # Draw Rectangle with the coordinates
    #         cv2.putText(frame, "person", (x1,y1),  cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) # Write the prediction class
    #     return frame
    # else:
    #     return frame
    
    mid_points = person_instances.pred_boxes.get_centers() # get middle point of boxes

    vis = Visualizer(frame[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.0)

    v = vis.draw_instance_predictions(person_instances.to("cpu")) # draw the rectangle boxes
        
    # draw the circle mid point
    for i in range(len(mid_points)):
        circle_coord = (int(mid_points[i,0]), int(mid_points[i,1]))
        v = vis.draw_circle(circle_coord, color=circle_col)
    
    # draw the lines that disobey social distance
    for i in range(len(mid_points)):
        for j in range(i+1, len(mid_points)):
        
            euclid_dist = math.sqrt((mid_points[i,0] - mid_points[j,0])**2 + (mid_points[i,1] - mid_points[j,1])**2)

            if euclid_dist < minm_dist:  
                x_data = [int(mid_points[i,0].item()), int(mid_points[j,0].item())]
                y_data = [int(mid_points[i,1].item()), int(mid_points[j,1].item())]
                v = vis.draw_line(x_data,y_data, line_col)

    return v.get_image()[:, :, ::-1]    

In [5]:
cap = cv2.VideoCapture('inp3.mp4')

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('outputinpvid2.mp4',fourcc, 20.0,  (int(cap.get(3)), int(cap.get(4))))

# Set parameters
ratio = 0.5

line_col = (1, 0, 0) 
circle_col = (0,1,0)

while(cap.isOpened()):
    ret, frame = cap.read()
    
    if ret==True:
        
        outputs = predictor(frame)
        person_instances = outputs["instances"][outputs["instances"].pred_classes[:] == 0]  
        
        minm_dist = 0
        
        for i in person_instances.pred_boxes:
            minm_dist = max(minm_dist,int(i[3] - i[1])) 
            minm_dist = int(minm_dist * ratio)
                
        frame = object_detection_api(person_instances, frame, minm_dist = minm_dist)
        
        out.write(frame)
        
        cv2.imshow('frame',frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# Release everything if job is finished
cap.release()
out.release()
cv2.destroyAllWindows()