try structural integrity index

In [564]:
import numpy as np
import cv2

In [565]:
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
MIN_CONFIDENCE = 0.4
NON_MAXIMUM_SUPPRESION_THRESH = 0.45
SCORE_TRESH = 0.25
CLASSES = ['bicycle', 'car', 'motorbike', 'bus', 'truck']
LANES = [
    np.array([[258, 408], [105, 201], [113, 125], [408, 388]], np.int32).reshape((-1,1,2)),
    np.array([[408, 388], [88, 98], [98, 51], [528, 378]], np.int32).reshape((-1,1,2)),
    np.array([[528, 378], [48, 8], [78, 0], [638, 368]], np.int32).reshape((-1,1,2)),
    np.array([[1142, 373], [1918, 292], [1915, 318], [1188, 393]], np.int32).reshape((-1,1,2)),
    np.array([[1188, 393], [1915, 318], [1918, 343], [1243, 415]], np.int32).reshape((-1,1,2)),
    np.array([[1243, 415], [1918, 343], [1918, 373], [1295, 440]], np.int32).reshape((-1,1,2)),
    np.array([[1418, 618], [1918, 873], [1645, 878], [1243, 643]], np.int32).reshape((-1,1,2)),
    np.array([[1243, 643], [1645, 878], [1398, 878], [1083, 664]], np.int32).reshape((-1,1,2)),
    np.array([[1083, 664], [1398, 878], [1183, 878], [948, 683]], np.int32).reshape((-1,1,2))
]

In [566]:
# image has to be converted to 640x640 for yolov5
def format_yolov5(image):
    row, col, _ = image.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = image
    
    return result

In [567]:
def get_preds(image, net):
    input_image = format_yolov5(image)
    blob = cv2.dnn.blobFromImage(input_image , 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True)
    net.setInput(blob)

    return net.forward()

In [568]:
def classnames_to_ids(classnames):
    return [id for id in range(len(classnames)) if classnames[id] in CLASSES]

In [569]:
def get_classes(path="assets/classes.txt"):
    class_list = []
    with open(path, "r") as f:
        class_list = [cname.strip() for cname in f.readlines()]
    return class_list

In [570]:
# step 3 - unwrap the predictions to get the object detections 
def unwrap_detections(
    predictions, 
    formatted_image, 
    image, 
    classes_path="assets/classes.txt", 
    show_intermediary=False
):
    class_ids = []
    confidences = []
    boxes = []

    output_data = predictions[0]

    image_width, image_height, _ = formatted_image.shape
    x_factor = image_width / INPUT_WIDTH
    y_factor = image_height / INPUT_HEIGHT

    for r in range(25200):
        row = output_data[r]
        confidence = row[4]
        if confidence >= MIN_CONFIDENCE:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > SCORE_TRESH):

                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    class_list = get_classes(classes_path)

    desired_classes_ids = classnames_to_ids(class_list) 

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, SCORE_TRESH, NON_MAXIMUM_SUPPRESION_THRESH) 

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        if class_ids[i] in desired_classes_ids:
            result_confidences.append(confidences[i])
            result_class_ids.append(class_ids[i])
            result_boxes.append(boxes[i])

    for i in range(len(result_class_ids)):
        box = result_boxes[i]
        class_id = result_class_ids[i]

        if show_intermediary:
            for point_set in LANES:
                cv2.polylines(image, [point_set], True, (0, 255, 255), 2)
            cv2.rectangle(image, box, (0, 255, 255))
            cv2.rectangle(image, (box[0], box[1] - 20), (box[0] + box[2], box[1]), (0, 255, 255), -1)
            cv2.putText(image, class_list[class_id], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))

    if show_intermediary:
        cv2.imshow("output", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    
    return result_boxes

In [571]:
image = cv2.imread('../train/Task1/01_2.jpg')
formatted_image = format_yolov5(image) # making the image square
net = cv2.dnn.readNet('assets/yolov5s.onnx')
preds = get_preds(image, net)
bounding_boxes = unwrap_detections(preds, formatted_image, image, show_intermediary=True)
# for bb in bounding_boxes:
#     x, y, w, h = bb
#     roi = image[y:y+h, x:x+w] 
#     cv2.imshow('test', roi)
#     cv2.waitKey(0)
#     cv2.destroyAllWindows()