In [None]:
!pip3 install opencv-python  > /dev/null
!pip3 install albumentations > /dev/null

# Master

In [72]:
# Imports
import numpy as np
from PIL import Image
from torchvision import transforms as T
import cv2 as cv
import torch
import torch.utils.data
import os
import albumentations as A

# Set Variable
os.environ['KMP_DUPLICATE_LIB_OK']='True'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Variables
BOX_COLOR = (0, 0, 225) # Red
BOX_THICKESS = 5
TEXT_COLOR = (255, 255, 255) # White
TEXT_SIZE = 0.9
TEXT_FONT = 0
IMG_SIZE = (200, 200)
category_id_to_name = { 1: "dent", 2: "broken_glass", 3: "deflated_wheel", 4: "scratch", 5: "broken_headlight"}
transform_to_tensor = T.Compose([T.ToTensor()])


def get_training_augmentation(_src, _bboxes, _category_id, _size):
    transform = A.Compose([A.Resize(_size[0], _size[1])], bbox_params=A.BboxParams(format='coco', label_fields=['category_id']))
    return transform(image=_src, bboxes=_bboxes, category_id=_category_id)['bboxes']


def tensor_to_image(tensor):
    tensor = tensor*255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor)>3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return Image.fromarray(tensor)


def draw_box(_image, _left, _top, _right, _bottom, _name):
    cv.rectangle(_image, (_left, _top), (_right, _bottom), BOX_COLOR, BOX_THICKESS)
    cv.putText(_image, _name, (_right+10, _bottom), TEXT_FONT, TEXT_SIZE, TEXT_COLOR)
    return _image


def show_webcam(mirror=False):
    model = torch.load(PATH, map_location=device)
    model.eval()
    cam = cv.VideoCapture(VIDEO_PATH)
    frame_exist = cam.isOpened()
    frame_id = 0
    last_bboxes = []
    while frame_exist:
        frame_exist, img = cam.read()

        if not frame_exist: break
        if mirror: img = cv.flip(img, 1)
        input_size = img.shape[:2]

        '''
        Optimization trick:
        Nearest frames in camera or video streaming have the same objects which are approximately placed closely
        We can skip some next frames from predictor and move the boxes from previous frame to increase FPS in STREAMING mode
        Skipped frames output without latency
        '''
        if frame_id % FRAME_PRED == 0:

            # TODO: If there any nessesarity to conver so much times?
            #########################
            #img_ = torch.tensor(img)
            #img_TORCH = tensor_to_image(img_)
            #img_T = transform(img_TORCH)
            #########################
            img_T = transform_to_tensor(img)

            with torch.no_grad(): prediction = model([img_T])

            if len(prediction[0]["labels"]) > 0:  # Predictor find any class of damage?
                last_bboxes.clear()
                h = 0
                for left, top, right, bottom in prediction[0]['boxes']:
                    if float(prediction[0]["scores"][h]) > TOLERANCE:
                        left, top, right, bottom = int(left), int(top), int(right), int(bottom)
                        name = category_id_to_name.get(int(prediction[0]['labels'][h]))
                        last_bboxes.append( (left, top, right, bottom, name) )                  # Save boxes
                        img = draw_box(img, left, top, right, bottom, name)
                    h += 1
            else:
                last_bboxes.clear()

        elif len(last_bboxes) and STREAMING:
            for left, top, right, bottom, name in last_bboxes:
                img = draw_box(img, left, top, right, bottom, name)
        
        cv.imshow('my webcam', img)

        if cv.waitKey(1) == 27: 
            break  # esc to quit
    frame_id += 1
    cv.destroyAllWindows()
    print("Done")
    return 


STREAMING = True                    # Enable outputting not predicted frames
FRAME_PRED = 10                     # FPS of Prediction pipeline
TOLERANCE = 0.7                     # Predictors confidence of class
PATH = "include/model_70.pt"        # Path to predictor model


#VIDEO_PATH = "data/IMG_5367.MOV"
#VIDEO_PATH = "data/testcam.png"
#VIDEO_PATH = "data/car.jpeg"
VIDEO_PATH = "data/carb.jpg"
#VIDEO_PATH = "data/car_2.jpg"
VIDEO_PATH = "data/car3.jpg"
#VIDEO_PATH = 0                     # Camera


show_webcam()           # Main function

Done
